In VISUALIZATION VIBES project Study 2, participants completed an attitutde eliciation survey, asking questions about their attitude toward (5) stimulus images (data visualizations). Each participant was randomly assigned to one of 6 stimulus blocks, each containing 1 image from each of (4) pseudo-categories (ranging from most abstract to most figural). Each participant started by responding to questions for a single ‘common’ stimulus (that is thus super-powered as it was seen by all participants). Two participant recruitment pools were used: Prolific, with a smaller set of participants recruited from Tumblr (to replicate and compare survey results to Study 1 interviews with participants sourced from Tumblr).
This notebook contains code to replicate quantitative analysis of data from Study 2 reported in the CHI submission. Note that due to limited space, we were unable to report results for all stimulus blocks, and all possible analyses. A separate set of R notebooks are included in the supplementary materials that document analysis of the other blocks not reported here.
This notebook includes analysis and exploration of the full data set (i.e. data aggregated over all stimuli).
We start by importing data files previously wrangled in
0_VIBES_S2_wrangling.Rmd.
############## IMPORT REFERENCE FILES
ref_stimuli <- readRDS("data/input/REFERENCE/ref_stimuli.rds")
ref_surveys <- readRDS("data/input/REFERENCE/ref_surveys.rds")
ref_labels <- readRDS("data/input/REFERENCE/ref_labels.rds")
ref_labels_abs <- readRDS("data/input/REFERENCE/ref_labels_abs.rds")
############## SETUP Graph Labels
ref_stim_id <- levels(ref_stimuli$ID)
ref_cat_questions <- c("MAKER_ID","MAKER_AGE","MAKER_GENDER")
ref_free_response <- c("MAKER_DETAIL", "MAKER_EXPLAIN", "TOOL_DETAIL", "CHART_EXPLAIN")
ref_conf_questions <- c("MAKER_CONF", "AGE_CONF", "GENDER_CONF", "TOOL_CONF")
ref_sd_questions <- rownames(ref_labels)
ref_sd_questions_abs <- rownames(ref_labels_abs)
# ref_blocks <- c("block1", "block2", "block3", "block4", "block5", "block6")
ref_blocks <- c(1,2,3,4,5,6)
############## IMPORT DATA FILES
# df_data <- readRDS("data/output/df_data.rds") #1 row per participant — WIDE
df_participants <- readRDS("data/output/df_participants.rds") #1 row per participant — demographic
df_questions <- readRDS("data/output/df_questions.rds") #1 row per question — LONG
df_sd_questions_wide <- readRDS("data/output/df_sd_questions_wide.rds") # only sd questions WIDE
df_tools <- readRDS("data/output/df_tools.rds") #multiselect format for tools Question
df_actions <- readRDS("data/output/df_actions.rds") # multiselect format for action Question
# # df_graphs_full <- readRDS("data/output/df_graphs_full.rds") #includes free response data
df_graphs <- readRDS("data/output/df_graphs.rds") #only categorical and numeric questions
df_sd_questions_long <- readRDS("data/output/df_sd_questions_long.rds") # only sd questions LONG
### DATA FILES WITH (VARIABLE-WISE) Z-SCORED SEMANTIC DIFFERENTIAL QS
df_graphs_z <- readRDS("data/output/df_graphs_z.rds") #only categorical and numeric questions
df_sd_questions_long_z <- readRDS("data/output/df_sd_questions_long_z.rds") # only sd questions LONG
### DATA FILES WITH ABSOLUTE VALUE SEMANTIC DIFFERENTIAL QS
df_graphs_abs <- readRDS("data/output/df_graphs_abs.rds") #only categorical and numeric questions
df_sd_questions_long_abs <- readRDS("data/output/df_sd_questions_long_abs.rds") # only sd questions LONG
############## SETUP Colour Palettes
#https://www.r-bloggers.com/2022/06/custom-colour-palettes-for-ggplot2/
## list of color pallettes
my_colors = list(
politics = c("#184aff","#5238bf", "#4f4a52" ,"#84649c", "#ff0000"),
blackred = c("black","red"),
greys = c("#3D3D3D","#7A7A7A","#A3A3A3"),
greens = c("#ADC69D","#81A06D","#567E39","#2D5D16","#193E0A"),
smallgreens = c("#ADC69D","#567E39","#193E0A"),
olives = c("#CDCEA1","#B8B979","#A0A054","#78783F","#50502A","#35351C"),
lightblues = c("#96C5D2","#61A2B2","#3C8093","#2C6378","#1F4A64"),
darkblues = c("#7AAFE1","#3787D2","#2A73B7","#225E96","#1A4974","#133453"),
reds = c("#D9B8BD","#CE98A2","#B17380","#954E5F","#78263E","#62151F"),
traffic = c("#CE98A2","#81A06D","yellow"),
questions = c("#B17380","#3787D2", "#567E39", "#EE897F"),
tools= c("#D55662","#EE897F","#F5D0AD","#A0B79B","#499678","#2D363A"),
encounter = c("#729B7D","#8E8E8E"),
actions = c("#2A363B","#039876ff","#99b898ff","#fdcea8ff","#ff837bff","#e84a60ff"),
platforms = c("#5D93EA","#FF70CD", "#3BD3F5", "#8B69B5","black"),
amy_gradient = c("#ac57aa", "#9e5fa4", "#90689f", "#827099", "#747894", "#66818e", "#578988", "#499183", "#3b997d", "#2da278", "#1faa72"),
my_favourite_colours = c("#702963", "#637029", "#296370")
)
## function for using palettes
my_palettes = function(name, n, all_palettes = my_colors, type = c("discrete","continuous"), direction = c("1","-1")) {
palette = all_palettes[[name]]
if (missing(n)) {
n = length(palette)
}
type = match.arg(type)
out = switch(type,
continuous = grDevices::colorRampPalette(palette)(n),
discrete = palette[1:n]
)
out = switch(direction,
"1" = out,
"-1" = palette[n:1])
structure(out, name = name, class = "palette")
}
############## RETURNS SD STACKED AND COLORED BY BY X
## LOOP STYLE
multi_sd <- function (data, left, right, x, y, color) {
# g <- ggplot(df, aes(y = .data[[x]], x = {{y}}, color = {{color}}))+
g <- ggplot(data, aes(y = .data[[x]], x = .data[[y]], color = .data[[color]]))+
geom_boxplot(width = 0.5) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
guides(
y = guide_axis_manual(labels = left),
y.sec = guide_axis_manual(labels = right)
) + theme_minimal()
return(g)
}
############## RETURNS SINGLE SD
## LOOP STYLE
single_sd <- function (data, left, right, x) {
g <- ggplot(data, aes(y = {{x}}, x = ""))+
geom_boxplot(width = 0.5) +
geom_jitter(width = 0.1, alpha=0.5) +
scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
guides(
y = guide_axis_manual(labels = left),
y.sec = guide_axis_manual(labels = right)
) + theme_minimal()
return(g)
}
######## RETURNS SINGLE SD
## APPLY STYLE
plot_sd = function (data, column, type, mean, facet, facet_by, boxplot) {
ggplot(data, aes(y = .data[[column]], x="")) +
{if(boxplot) geom_boxplot(width = 0.5) } +
geom_jitter(width = 0.1, alpha=0.2, {if(facet) aes(color=.data[[facet_by]])}) +
{if(mean) stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue")} +
{if(mean) stat_summary(fun="mean", geom="text", colour="blue", fontface = "bold",
vjust=-1.25, hjust = 0.50, aes( label=round(..y.., digits=0))) } +
{if(facet) facet_grid(.data[[facet_by]] ~ .)} +
# scale_y_continuous(limits=c(-1,101)) +
labs(x="", y="") +
coord_flip() +
{if(type == "S")
guides(
y = guide_axis_manual(labels = ref_labels[column,"left"]),
y.sec = guide_axis_manual(labels = ref_labels[column,"right"])
)} +
{if(type == "Q")
guides(
y = guide_axis_manual(labels = ref_labels[q,"left"]),
y.sec = guide_axis_manual(labels = ref_labels[q,"right"])
)} +
theme_minimal() +
labs (
caption = column
) + easy_remove_legend()
}
As we argue in our manuscript, we understand that an individual’s response to a visualization (both inferences about data, as well as any other behaviours) will vary based on properties of: (1) the visualization, (2) the data, (3) the individual, and (4) the situational context. Thus, our survey is not designed to uncover consistencies in behaviour, but rather, explore the nature of variance in behaviour as a function of the individual and visualization. For this reason, we do not expect to see any systematic relationships between survey variables.
(n = 318 ) survey respondents answered questions about some subset of the stimuli, (common stimulus B0-0 and 4 additional images defined as a block), yielding (o = 1590) stimulus-level observations.
df <- df_participants
## FOR DESCRIPTIVES PARAGRAPH
# #PROLIFIC
df.p <- df %>% filter(Distribution == "PROLIFIC")
desc.gender.p <- table(df.p$D_gender) %>% prop.table()
names(desc.gender.p) <- levels(df.p$D_gender)
p_participants <- nrow(df.p)
# #TUMBLR
df.t <- df %>% filter(Distribution == "TUMBLR")
desc.gender.t <- table(df.t$D_gender) %>% prop.table()
names(desc.gender.t) <- levels(df.t$D_gender)
t_participants <- nrow(df.t)
For study 2, a total of 318 participants were recruited from US-located English speaking users of TUMBLR (n = 78) and PROLIFIC (n = 240).
240 individuals from PROLIFIC participated in Study 2, ( 54% Female, 42% Male, 3% Non-binary, 1% Other).
78 individuals from Tumblr participated in Study 2, ( 36% Female, 5% Male, 40% Non-binary, 19% Other). Note that a higher proportion of participants recruited from TUMBLR report identities other than cis-gender Female and cis-gender Male.
df <- df_participants
## for descriptives paragraph
p.desc.duration <- psych::describe(df %>% filter(Distribution=="PROLIFIC") %>% pull(duration.min))
t.desc.duration <- psych::describe(df %>% filter(Distribution=="TUMBLR") %>% pull(duration.min))
PROLIFIC SAMPLE (n = 240 ) participant response times ranged from 13.97 to 216.18 minutes, with a mean response time of 42.49 minutes, SD = 21.15.
TUMBLR SAMPLE (n = 78 ) participant response times ranged from 10.88 to 227.57 minutes, with a mean response time of 51.93 minutes, SD = 35.47.
rm(df, df.p, df.t, p.desc.duration, t.desc.duration, desc.gender.p, desc.gender.t, p_participants, t_participants)
#full stimulus-level data
df_full <- df_graphs %>%
mutate(
STUDY = "" #dummy variable for univariate visualizations
)
# %>%
# mutate(MAKER_ID = fct_rev(MAKER_ID))
When asking participants to identify the type, age and gender of the maker of a visualization, we also asked participants to indicate their confidence in these choices.
Across all participants and all stimuli, are these (categorical) questions answered with the same degree of confidence? Here we examine both the central tendency (mean) and shape of the distribution for each confidence variable.
df <- df_full %>% select(PID, Distribution, STIMULUS,MAKER_CONF, AGE_CONF, GENDER_CONF, TOOL_CONF) %>%
pivot_longer(
cols = c(MAKER_CONF, AGE_CONF, GENDER_CONF, TOOL_CONF),
names_to = "QUESTION",
values_to = "CONFIDENCE"
) %>%
mutate(
QUESTION = factor(QUESTION, levels=c("MAKER_CONF","AGE_CONF","GENDER_CONF","TOOL_CONF" ) )
)
## B
## CONFIDENCE ACROSS QUESTIONS (all stimuli, all Pps)
## BOXPLOT W/ JITTER
B <- df %>% ggplot(aes(x=QUESTION, y= CONFIDENCE)) +
geom_boxplot(width = 0.5) +
geom_jitter(alpha = 0.25, position=position_dodge2(width = 0.25)) +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=+0.5, hjust = -1.25, aes( label=round(..y.., digits=0)))+
stat_summary(fun=mean, geom="point", size = 4, colour="blue")+
theme_minimal() +
labs(title = "Confidence by Survey Question", caption = "(mean in blue)")
## R
## CONFIDENCE ACROSS QUESTIONS (all stimuli, all Pps)
## RIDGEPLOT W/ INTERVAL MEAN
R <- df %>%
ggplot(aes(x=CONFIDENCE, y=fct_rev(QUESTION), fill=fct_rev(QUESTION))) +
geom_density_ridges(scale = 0.65, alpha = 0.75, quantile_lines = TRUE) +
scale_x_continuous(limits = c(0,100))+
scale_fill_manual(values = my_palettes(name="questions", direction = "-1"), name = "", guide = guide_legend(reverse = TRUE)) +
stat_pointinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi") +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=+2, hjust = 0.50, aes( label=round(..x.., digits=0)))+
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
theme_minimal() +
labs(title = "Confidence by Survey Question", y = "QUESTION", caption =" (mean in blue)") +
easy_remove_legend()
(B+R)
## Picking joint bandwidth of 4.54
Aggregated across all participants and all stimuli, the average confidence scores for each question (maker id, age, gender, tool id) are similar, with slighly lower confidence for the GENDER question. This tells us there is enough variance in response to each question for the measure to be meaningful, and so we will follow up by investigating confidence at the STIMULUS level.
Participants were asked:
Who do you think is most likely responsible for having this
image created?
options: [business or corporation / journalist or news
outlet / educational or academic institution / government or political
organization / other organization / an individual] (select
one)
The response is stored as MAKER_ID
Participants were also asked: Please rate your confidence in
this choice. The response is stored as MAKER_CONF
.
#FILTER DATASET
df <- df_full
## D
## MAKER IDENTIFICATION AGGREGATED (all)
## GGSTATSPLOT
##############################
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_ID = fct_rev(MAKER_ID) )
S <- ggbarstats( data = dx, x = MAKER_ID, y = STUDY,
legend.title = "MAKER ID") +
scale_fill_manual(values = my_palettes(name="reds", direction = "1")) +
theme_minimal() +
labs( title = "", x = "", y="") +
theme(aspect.ratio = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################
## H
## HALF BOXPLOT + DOTPLOT + MEAN
##############################
H <- df %>%
group_by(MAKER_ID) %>%
mutate(count = n(), m = mean(MAKER_CONF)) %>%
ggplot(aes(y = MAKER_CONF, x = fct_rev(MAKER_ID), color = fct_rev(MAKER_ID))) +
geom_half_boxplot(side="r", alpha = 0.5, aes(fill=fct_rev(MAKER_ID))) +
# geom_boxplot(width = 0.6)+
# geom_jitter(position=position_jitterdodge(width ), alpha = 0.2) +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=-0.75, hjust = 1, aes( label=round(..y.., digits=0)))+
scale_color_manual(values = my_palettes(name="reds", direction = "-1"), guide = guide_legend(reverse = TRUE)) +
stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA,
aes(fill = fct_rev(MAKER_ID)) , color="black", point_interval = "mean_qi") +
scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), guide = guide_legend(reverse = TRUE)) +
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
geom_text(aes(label= paste0("n=",count) , y = 5), color = "black",
size = 3, nudge_x=0.35) +
labs(y="Maker ID Confidence", x="") +
theme_minimal() +
easy_remove_legend()+
coord_flip()
##############################
(p <- (S + H)) + plot_annotation(
title = "Maker ID and Confidence",
# subtitle = "the categories of MAKER ID were chosen in similar proportion,
# and both the mean (in blue) and shape of distribution of confidence scores is similar across values of Maker ID",
caption = "(blue indicates mean)"
)
The distribution of maker types is surprisingly equal across levels
of the maker_id variable… exception of ‘organization’. This
distribution is likely a function of the diversity of stimuli we
selected. Notably, the confidence scores are similar (both in mean and
shape of distribution) regardless of the maker_id, indicating that in
general, there is no maker identification for which participants have
less confidence.
Participants were asked:
Take a moment to imagine the person(s) responsible for creating
the image. What generation are they most likely
from?
options: [boomers (60+ years old) / Generation X (44-59
years old) / Millennials (28-43 years old) / Generation Z (12 - 27 years
old] (select one)
The response was saved as MAKER_AGE
Participants were asked: Please rate your confidence in this
choice. The response is stored as AGE_CONF .
#FILTER DATASET
df <- df_full
## D
## MAKER IDENTIFICATION AGGREGATED (all)
## GGSTATSPLOT
##############################
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_AGE = fct_rev(MAKER_AGE) )
S <- ggbarstats( data = dx, x = MAKER_AGE, y = STUDY,
legend.title = "MAKER AGE") +
scale_fill_manual(values = my_palettes(name="lightblues", direction = "1")) +
theme_minimal() +
labs( title = "", x = "", y="") +
theme(aspect.ratio = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################
## H
## HALF BOXPLOT + DOTPLOT + MEAN
##############################
H <- df %>%
group_by(MAKER_AGE) %>%
mutate(count = n(), m = mean(MAKER_CONF)) %>%
ggplot(aes(y = MAKER_CONF, x = fct_rev(MAKER_AGE), color = fct_rev(MAKER_AGE))) +
geom_half_boxplot(side="r", alpha = 0.5, aes(fill=fct_rev(MAKER_AGE))) +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=-0.75, hjust = 1, aes( label=round(..y.., digits=0)))+
scale_color_manual(values = my_palettes(name="lightblues", direction = "-1"),
guide = guide_legend(reverse = TRUE)) +
scale_fill_manual(values = my_palettes(name="lightblues", direction = "-1"),
guide = guide_legend(reverse = TRUE)) +
stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA,
aes(fill = fct_rev(MAKER_AGE)) , color="black", point_interval = "mean_qi") +
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
geom_text(aes(label= paste0("n=",count) , y = 5), color = "black",
size = 3, nudge_x=0.35) +
labs(y="Maker Age Confidence", x="") +
theme_minimal() +
easy_remove_legend()+
coord_flip()
##############################
(p <- (S + H)) + plot_annotation(
title = "Maker AGE and Confidence",
# subtitle = "The value
# distribution of confidence scores is similar across values of Maker AGE",
caption = "(blue indicates mean)"
)
The distribution of maker ages is distributed as we would expect if participants are answering the question with some sense of the maker’s occupation in mind, and thus answering with the generations that are mostly likely of working age (gen X, millenial). As with maker_id, confidence scores are similar (both in mean and shape of distribution) across all levels of maker_age, indicating that in general, there is no maker age for which participants have less confidence.
Participants were asked:
Take a moment to imagine the person(s) responsible for creating
the image. What gender do they most likely identify
with?
options: [female / male / other ] (select
one)
Responses were stored as MAKER_GENDER.
Participants were asked: Please rate your confidence in this
choice. The response is stored as GENDER_CONF
.
#FILTER DATASET
df <- df_full
## D
## MAKER IDENTIFICATION AGGREGATED (all)
## GGSTATSPLOT
##############################
#hack for consistent ordering of ggstats bar plot
dx <- df %>% mutate( MAKER_AGE = fct_rev(MAKER_AGE) )
S <- ggbarstats( data = dx, x = MAKER_GENDER, y = STUDY,
legend.title = "MAKER GENDER") +
scale_fill_manual(values = my_palettes(name="smallgreens", direction = "1")) +
theme_minimal() +
labs( title = "", x = "", y="") +
theme(aspect.ratio = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################
## H
## HALF BOXPLOT + DOTPLOT + MEAN
##############################
H <- df %>%
group_by(MAKER_GENDER) %>%
mutate(count = n(), m = mean(GENDER_CONF)) %>%
ggplot(aes(y = GENDER_CONF, x = fct_rev(MAKER_GENDER), color = fct_rev(MAKER_GENDER))) +
geom_half_boxplot(side="r", alpha = 0.5, aes(fill=fct_rev(MAKER_GENDER))) +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=-0.75, hjust = 1, aes( label=round(..y.., digits=0)))+
scale_color_manual(values = my_palettes(name="smallgreens", direction = "-1"),
guide = guide_legend(reverse = TRUE)) +
scale_fill_manual(values = my_palettes(name="smallgreens", direction = "-1"),
guide = guide_legend(reverse = TRUE)) +
stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA,
aes(fill = fct_rev(MAKER_GENDER)) , color="black", point_interval = "mean_qi") +
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
geom_text(aes(label= paste0("n=",count) , y = 5), color = "black",
size = 3, nudge_x=0.35) +
labs(y="Maker Gender Confidence", x="") +
theme_minimal() +
easy_remove_legend()+
coord_flip()
##############################
(p <- (S + H)) + plot_annotation(
title = "Maker GENDER and Confidence",
# subtitle = "The value
# distribution of confidence scores is similar across values of Maker AGE",
caption = "(blue indicates mean)"
)
The distribution of maker genders is not evenly distributed between
men and women as we might expect. We think it is most likely that the
‘male’ category serves as a default value for the maker gender, in the
absence of any particular feature of stimulus that viewers interpret as
strongly feminine. This hypothesis is grounded in the free response
data, however it is only a hypothesis.
Participants were asked: What tools do you think were most
likely used to create this image?
options: [basic graphic design software (e.g. Canva, or
similar) / advanced graphic design software (e.g. Adobe Illustrator,
Figma, or similar) / data visualization software (e.g. Tableau, PowerBI,
or similar)/ general purpose software (e.g. MS Word/Excel, Google
Sheets, or similar) / programming language (e.g. R, python, javascript,
or similar) ] (select all that apply)
The response was saved as variable TOOL_ID
(multi-select)
Participants were asked: Please rate your confidence in this
choice. The response is stored as TOOL_CONF .
#FILTER DATASET
df <- df_tools %>%
mutate(
STUDY = ""
)
## D
## MAKER IDENTIFICATION AGGREGATED (all)
## GGSTATSPLOT
##############################
#hack for consistent ordering of ggstats bar plot
S <- ggbarstats( data = df, x = TOOL_ID, y = STUDY,
legend.title = "TOOL ID") +
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
theme_minimal() +
labs( title = "", x = "", y="") +
theme(aspect.ratio = 1)
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
##############################
## H
## HALF BOXPLOT + DOTPLOT + MEAN
##############################
H <- df %>%
group_by(TOOL_ID) %>%
mutate(count = n(), m = mean(TOOL_CONF)) %>%
ggplot(aes(y = TOOL_CONF, x = fct_rev(TOOL_ID), color = fct_rev(TOOL_ID))) +
geom_half_boxplot(side="r", alpha = 0.5, aes(fill=fct_rev(TOOL_ID))) +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=-0.75, hjust = 1, aes( label=round(..y.., digits=0)))+
scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
scale_color_paletteer_d("awtools::a_palette", direction = 1)+
stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA,
aes(fill = fct_rev(TOOL_ID)) , color="black", point_interval = "mean_qi") +
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
geom_text(aes(label= paste0("n=",count) , y = 5), color = "black",
size = 3, nudge_x=0.35) +
labs(y="Tool ID Confidence", x="") +
theme_minimal() +
easy_remove_legend()+
coord_flip()
##############################
(p <- (S + H)) + plot_annotation(
title = "TOOL ID and Confidence",
# subtitle = "The value
# distribution of confidence scores is similar across values of Maker AGE",
caption = "(blue indicates mean)"
)
We had no expectations with respect to the distribution of values in tool identification, but note that are roughly even across categories (exception of ‘unknown’ and ‘programming’), and the confidence scores are similar.
The first question each participant saw in each stimulus block was:
As you’re scrolling through your feed, you see this image. What
would you do? options: keep scrolling, pause and look at the
image The response was saved as variable ENCOUNTER (select
one)
## B
## ENCOUNTER BY STIMULUS
## GGSTATSPLOT
df_full %>%
ggbarstats(
x = ENCOUNTER, y = STUDY,
legend.title = "ENCOUNTER",
results.subtitle = FALSE) +
scale_fill_manual(values = my_palettes(name="encounter", direction = "-1"))+
theme_minimal() +
labs( title = "ENCOUNTER Choice ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
Participants chose to ‘engage’ rather than ‘scroll past’ 59% of the time.
The last question participants were asked in each stimulus block was:
Imagine you encounter the following image while scrolling. Which
of the following are you most likely to do? options: post a
comment, share/repost, share/repost WITH comment, look up more
information about the topic or source, unfollow/block the source,
NOTHING—just keep scrolling The response was saved as variable
CHART_ACTION (multi-select)
## B
## ACTION BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
df_actions %>% mutate(
CHART_ACTION = fct_rev(CHART_ACTION),
STUDY="") %>%
ggbarstats( x = CHART_ACTION, y = STUDY,
legend.title = "CHART ACTION",
results.subtitle = FALSE) +
# scale_fill_paletteer_d("awtools::a_palette", direction = 1)+
scale_fill_manual(values = my_palettes(name="actions", direction = "1"))+
theme_minimal() +
labs( title = "ACTION Choice ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
A high proportion of participants answered ‘nothing’ chart action, which is not surprising given the social media context. I am surprised to see such a high proportion answering that they would seek further information!
Before starting the experimental blocks, participants were asked
Please choose a social media platform to imagine you are
engaging with during this study options: Twitter/X, Tumblr,
LinkedIn, Instagram, Facebook The response was saved as variable
PLATFORM (select one)
## B
## PLATFORM BY STIMULUS
## GGSTATSPLOT
#hack for consistent ordering of ggstats bar plot
df_full %>%
ggbarstats(
x = PLATFORM, y = STUDY,
legend.title = "PLATFORM",
results.subtitle = FALSE) +
scale_fill_manual(values = my_palettes(name="platforms", direction = "-1"))+
theme_minimal() +
labs( title = "PLATFORM Choice ", subtitle = "", x = "")
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.
We had no expectations about the distribution of social media platform.
The SD scores visualized here are in the same form as the participants’ resposne scale (slider from 0-100).
##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){
#################### ALL QUESTIONS across ALL STIMULUS #############################################################
#### LIST OF BLOXPLOTS + JITTER #############################################################################
# setup dataframe
df <- df_graphs
#subset data cols
cols <- df %>% select( all_of(ref_sd_questions))
plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type ="S", mean=TRUE, facet = FALSE, boxplot=TRUE))
#aggregate q plots into one for stimulus
plot_master_questions <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] +
plot_annotation(
title = "ALL STIMULI",
subtitle =""
)
ggsave(plot = plot_master_questions, path="figs/level_aggregated/distributions", filename =paste0("combined_stimuli","_box.png"), units = c("in"), width = 10, height = 14 )
#### GGDIST PLOT#############################################################################
# setup dataframe
df <- df_sd_questions_long %>% select(1:8, QUESTION, STIMULUS_CATEGORY, value)
d <- left_join( x = df, y = ref_labels,
by = c("QUESTION" = "ref_sd_questions")) %>%
mutate(
category=factor(category, levels=c("COMPETENCY","MAKER","CHART")),
QUESTION = factor(QUESTION, levels=ref_sd_questions))
# GGDIST HALFEYE (raincloud doesn't work b/c long tails)
(g <- ggplot(d, aes(y = fct_rev(QUESTION), x = value, fill=category)) +
stat_halfeye(scale=1, density="bounded", point_interval = "mean_qi") +
scale_fill_manual(values = my_palettes(name="greys", direction = "1"))+
# stat_interval(side = "bottom", scale = 2, slab_linewidth = NA) +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=+2, hjust = 0.50, aes( label=round(..x.., digits=0)))+
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
guides(
y = guide_axis_manual(labels = rev(ref_labels$left), title = ""),
y.sec = guide_axis_manual(labels = rev(ref_labels$right))
) +
cowplot::draw_text(text = ref_sd_questions, x = 90, y= ref_sd_questions,size = 10, vjust=-2) +
labs (title = "ALL STIMULI", y = "") +
theme_minimal() + easy_remove_legend()
)
ggsave(plot = g, path="figs/level_aggregated/distributions", filename =paste0("combined_stimuli","_ggdist.png"), units = c("in"), width = 10, height = 14 )
#### DENSITY RIDGES#############################################################################
# setup dataframe
df <- df_sd_questions_long %>% select(1:8, QUESTION, value)
d <- left_join( x = df, y = ref_labels,
by = c("QUESTION" = "ref_sd_questions")) %>%
mutate(
category=factor(category, levels=c("COMPETENCY","MAKER","CHART")),
QUESTION = factor(QUESTION, levels=ref_sd_questions))
( x <-ggplot(d, aes(x = value, y = fct_rev(QUESTION), fill = category)) +
geom_density_ridges(scale = 0.9,quantile_lines = TRUE, alpha = 0.75) +
# scale_fill_manual(values = my_palettes(name="amy_gradient", direction = "1"))+
scale_fill_manual(values = my_palettes(name="greys", direction = "1"))+
guides(
y = guide_axis_manual(labels = rev(ref_labels$left)),
y.sec = guide_axis_manual(labels = rev(ref_labels$right))
) +
labs(title = "ALL STIMULI", y = "") +
cowplot::draw_text(text = ref_sd_questions, x = 100, y= ref_sd_questions,size = 10, vjust=-2) + ##raw
# cowplot::draw_text(text = ref_sd_questions, x = -4, y= ref_sd_questions,size = 10, vjust=-2) + ##z-score
theme_minimal() + easy_remove_legend()
)
ggsave(plot = x, path="figs/level_aggregated/distributions", filename =paste0("combined_stimuli","_ridges.png"), units = c("in"), width = 10, height = 14 )
#### GROUPED DENSITY RIDGES#############################################################################
# setup dataframe
df <- df_sd_questions_long %>% select(1:8, QUESTION, STIMULUS_CATEGORY, value)
d <- left_join( x = df, y = ref_labels,
by = c("QUESTION" = "ref_sd_questions")) %>%
mutate(
category=factor(category, levels=c("COMPETENCY","MAKER","CHART")),
QUESTION = factor(QUESTION, levels=ref_sd_questions),
STIMULUS_CATEGORY = factor(STIMULUS_CATEGORY, levels = c("A","B","C","D","F")))
( c <-ggplot(d, aes(x = value, y = fct_rev(QUESTION), fill=STIMULUS_CATEGORY))+
geom_density_ridges(scale = 0.75, alpha = 0.5, panel_scaling = TRUE) +
facet_grid2(.~STIMULUS_CATEGORY)+
# geom_density_ridges(scale = 1, quantile_lines = TRUE, alpha = 0.25)
guides(
y = guide_axis_manual(labels = rev(ref_labels$left)),
y.sec = guide_axis_manual(labels = rev(ref_labels$right))
) +
labs(title = "by STIMULUS CATEGORY", y = "") +
cowplot::draw_text(text = ref_sd_questions, x = 40, y= ref_sd_questions,size = 10, vjust=2) + ##raw
# # cowplot::draw_text(text = ref_sd_questions, x = -4, y= ref_sd_questions,size = 10, vjust=-2) + ##z-score
theme_minimal() + easy_remove_legend()
)
ggsave(plot = c, path="figs/level_aggregated/distributions", filename =paste0("combined_by_category","_ridges.png"), units = c("in"), width = 10, height = 14 )
}
## Picking joint bandwidth of 4.51
## Picking joint bandwidth of 5.16
## Picking joint bandwidth of 5.32
## Picking joint bandwidth of 7.21
## Picking joint bandwidth of 6.27
## Picking joint bandwidth of 6.14
g
x
## Picking joint bandwidth of 4.51
c
## Picking joint bandwidth of 5.16
## Picking joint bandwidth of 5.32
## Picking joint bandwidth of 7.21
## Picking joint bandwidth of 6.27
## Picking joint bandwidth of 6.14
rm(df, df_full, S,H, B,g)
Here the scale of the semantic differential questions have been collapsed, such that 0 is the midpoint of the scale (indicating uncertainty, or not strongly indicating either of the labelled traits) and both 100 and 0 are 50 (indicating a strong signal toward either of the labelled traits).
##ONLY RENDER GRAPHS IF SET TO TRUE
if(graph_render == TRUE){
#################### ALL QUESTIONS across ALL STIMULUS #############################################################
#### LIST OF BLOXPLOTS + JITTER #############################################################################
# setup dataframe
df <- df_graphs_abs
#subset data cols
cols <- df %>% select( all_of(ref_sd_questions_abs))
plots <- as.list(lapply(colnames(cols), plot_sd, data = df, type ="S", mean=TRUE, facet = FALSE, boxplot=TRUE))
#aggregate q plots into one for stimulus
plot_master_questions <- plots[[1]] / plots[[2]] / plots[[3]] / plots[[4]] / plots[[5]] / plots[[6]] / plots[[7]] /
plots[[8]] /plots[[9]] /plots[[10]] /plots[[11]] +
plot_annotation(
title = "ALL STIMULI",
subtitle =""
)
ggsave(plot = plot_master_questions, path="figs/level_aggregated/distributions", filename =paste0("ABS_combined_stimuli","_box.png"), units = c("in"), width = 10, height = 14 )
#### GGDIST PLOT#############################################################################
# setup dataframe
df <- df_sd_questions_long_abs %>% select(1:8, QUESTION, STIMULUS_CATEGORY, value)
d <- left_join( x = df, y = ref_labels_abs,
by = c("QUESTION" = "ref_sd_questions_abs")) %>%
mutate(
category=factor(category, levels=c("COMPETENCY","MAKER","CHART")),
QUESTION = factor(QUESTION, levels=ref_sd_questions))
# GGDIST HALFEYE (raincloud doesn't work b/c long tails)
( g <- ggplot(d, aes(y = fct_rev(QUESTION), x = value, fill=category)) +
stat_halfeye(scale=1, density="bounded", point_interval = "mean_qi") +
scale_fill_manual(values = my_palettes(name="greys", direction = "1"))+
# stat_interval(side = "bottom", scale = 2, slab_linewidth = NA) +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=+2, hjust = 0.50, aes( label=round(..x.., digits=0)))+
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
guides(
y = guide_axis_manual(labels = rev(ref_labels$left), title = ""),
y.sec = guide_axis_manual(labels = rev(ref_labels$right))
) +
cowplot::draw_text(text = ref_sd_questions, x = 45, y= ref_sd_questions_abs,size = 10, vjust=-2) +
labs (title = "ALL STIMULI", y = "") +
theme_minimal() + easy_remove_legend()
)
ggsave(plot = g, path="figs/level_aggregated/distributions", filename =paste0("ABS_combined_stimuli","_ggdist.png"), units = c("in"), width = 10, height = 14 )
#### DENSITY RIDGES#############################################################################
# setup dataframe
df <- df_sd_questions_long_abs %>% select(1:8, QUESTION, value)
d <- left_join( x = df, y = ref_labels_abs,
by = c("QUESTION" = "ref_sd_questions_abs")) %>%
mutate(
category=factor(category, levels=c("COMPETENCY","MAKER","CHART")),
QUESTION = factor(QUESTION, levels=ref_sd_questions))
( x <-ggplot(d, aes(x = value, y = fct_rev(QUESTION), fill = category)) +
geom_density_ridges(scale = 0.9,quantile_lines = TRUE, alpha = 0.75) +
# scale_fill_manual(values = my_palettes(name="amy_gradient", direction = "1"))+
scale_fill_manual(values = my_palettes(name="greys", direction = "1"))+
guides(
y = guide_axis_manual(labels = rev(ref_labels$left)),
y.sec = guide_axis_manual(labels = rev(ref_labels$right))
) +
labs(title = "ALL STIMULI", y = "") +
cowplot::draw_text(text = ref_sd_questions, x = 45, y= ref_sd_questions_abs,size = 10, vjust=-3) + ##raw
# cowplot::draw_text(text = ref_sd_questions, x = -4, y= ref_sd_questions,size = 10, vjust=-2) + ##z-score
theme_minimal() + easy_remove_legend()
)
ggsave(plot = x, path="figs/level_aggregated/distributions", filename =paste0("ABS_combined_stimuli","_ridges.png"), units = c("in"), width = 10, height = 14 )
#### GROUPED DENSITY RIDGES#############################################################################
# setup dataframe
df <- df_sd_questions_long_abs %>% select(1:8, QUESTION, STIMULUS_CATEGORY, value)
d <- left_join( x = df, y = ref_labels_abs,
by = c("QUESTION" = "ref_sd_questions_abs")) %>%
mutate(
category=factor(category, levels=c("COMPETENCY","MAKER","CHART")),
QUESTION = factor(QUESTION, levels=ref_sd_questions),
STIMULUS_CATEGORY = factor(STIMULUS_CATEGORY, levels = c("A","B","C","D","F")))
( c <-ggplot(d, aes(x = value, y = fct_rev(QUESTION), fill=STIMULUS_CATEGORY))+
geom_density_ridges(scale = 0.75, alpha = 0.5, panel_scaling = TRUE) +
facet_grid2(.~STIMULUS_CATEGORY)+
# geom_density_ridges(scale = 1, quantile_lines = TRUE, alpha = 0.25)
guides(
y = guide_axis_manual(labels = rev(ref_labels_abs$left)),
y.sec = guide_axis_manual(labels = rev(ref_labels_abs$right))
) +
labs(title = "by STIMULUS CATEGORY", y = "") +
cowplot::draw_text(text = ref_sd_questions_abs, x = 20, y= ref_sd_questions_abs,size = 10, vjust=2) + ##raw
theme_minimal() + easy_remove_legend()
)
ggplot2::ggsave(plot = c, path="figs/level_aggregated/distributions", filename =paste0("ABS_combined_by_category","_ridges.png"), units = c("in"), width = 10, height = 14 )
}
## Picking joint bandwidth of 2.9
## Picking joint bandwidth of 3.78
## Picking joint bandwidth of 3.72
## Picking joint bandwidth of 4.28
## Picking joint bandwidth of 3.97
## Picking joint bandwidth of 3.84
g
x
## Picking joint bandwidth of 2.9
c
## Picking joint bandwidth of 3.78
## Picking joint bandwidth of 3.72
## Picking joint bandwidth of 4.28
## Picking joint bandwidth of 3.97
## Picking joint bandwidth of 3.84
rm(df, df_full, S,H, B,g,c)
df <- df_graphs %>% select(
MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE,
MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID)
print("FULL CORRELATION NO RANDOM EFFECT")
## [1] "FULL CORRELATION NO RANDOM EFFECT"
## CALCULATE full correlations with no random effects
c <- df %>% correlation(partial=FALSE, include_factors=FALSE)
(s <- c %>% summary(redundant = FALSE))
## # Correlation Matrix (pearson-method)
##
## Parameter | CHART_BEAUTY | CHART_LIKE | CHART_INTENT | CHART_TRUST | MAKER_TRUST | MAKER_ALIGN | MAKER_SELF | MAKER_ARGUE | MAKER_POLITIC | MAKER_DATA
## ----------------------------------------------------------------------------------------------------------------------------------------------------------
## MAKER_DESIGN | -0.40*** | -0.34*** | -0.03 | -0.19*** | -0.16*** | -0.09** | 0.09** | -0.02 | 0.06 | 0.39***
## MAKER_DATA | -0.20*** | -0.25*** | 0.32*** | -0.39*** | -0.35*** | -0.15*** | 0.11*** | -0.12*** | 0.02 |
## MAKER_POLITIC | -0.17*** | -0.22*** | 0.11*** | -0.20*** | -0.32*** | -0.47*** | 0.50*** | -0.31*** | |
## MAKER_ARGUE | 0.25*** | 0.30*** | -0.31*** | 0.40*** | 0.49*** | 0.40*** | -0.47*** | | |
## MAKER_SELF | -0.34*** | -0.42*** | 0.30*** | -0.46*** | -0.58*** | -0.67*** | | | |
## MAKER_ALIGN | 0.38*** | 0.47*** | -0.27*** | 0.50*** | 0.62*** | | | | |
## MAKER_TRUST | 0.36*** | 0.49*** | -0.43*** | 0.71*** | | | | | |
## CHART_TRUST | 0.48*** | 0.60*** | -0.48*** | | | | | | |
## CHART_INTENT | -0.11*** | -0.20*** | | | | | | | |
## CHART_LIKE | 0.83*** | | | | | | | | |
##
## p-value adjustment method: Holm (1979)
plot(s, show_data="point") + labs(title = "Correlation Matrix",
subtitle="(full correlation; pearson method; Holm p-value adjustment)") + theme_minimal()
print("PARTIAL CORRELATION WITH PID AS RANDOM EFFECT")
## [1] "PARTIAL CORRELATION WITH PID AS RANDOM EFFECT"
#CALCULATE partial correlations with PID as random effect
## (this isolates correlation pairwise factoring out other variables)
c <- df %>% correlation(partial=TRUE,multilevel = TRUE)
(s <- c %>% summary(redundant = FALSE ))
## # Correlation Matrix (pearson-method)
##
## Parameter | CHART_BEAUTY | CHART_LIKE | CHART_INTENT | CHART_TRUST | MAKER_TRUST | MAKER_ALIGN | MAKER_SELF | MAKER_ARGUE | MAKER_POLITIC | MAKER_DATA
## ----------------------------------------------------------------------------------------------------------------------------------------------------------
## MAKER_DESIGN | -0.26*** | 8.55e-03 | -0.16*** | 0.04 | -0.04 | 0.07 | 0.01 | 0.08* | 0.04 | 0.35***
## MAKER_DATA | 0.08* | -0.04 | 0.20*** | -0.15*** | -0.13*** | 3.78e-03 | -0.13*** | 0.01 | -0.06 |
## MAKER_POLITIC | 0.02 | -6.67e-03 | -0.06 | 0.06 | -0.05 | -0.23*** | 0.28*** | -0.11*** | |
## MAKER_ARGUE | 0.07 | -0.03 | -0.11*** | 0.03 | 0.16*** | 7.90e-03 | -0.17*** | | |
## MAKER_SELF | -0.03 | -0.04 | 0.07 | -2.17e-03 | -0.16*** | -0.36*** | | | |
## MAKER_ALIGN | 3.74e-03 | 0.10** | 0.04 | 0.04 | 0.25*** | | | | |
## MAKER_TRUST | -0.08* | 0.05 | -0.10** | 0.39*** | | | | | |
## CHART_TRUST | 0.04 | 0.23*** | -0.27*** | | | | | | |
## CHART_INTENT | 0.05 | 0.03 | | | | | | | |
## CHART_LIKE | 0.74*** | | | | | | | | |
##
## p-value adjustment method: Holm (1979)
###### VIS WITH CORRELATION PACKAGE
#SEE [correlation] PLOT
g <- plot(s, show_data = "point", show_text = "label",
stars=TRUE, show_legend=FALSE,
show_statistic = FALSE, show_ci = FALSE) +
theme_minimal()+
labs(title = "Correlation Matrix — SD Questions",
subtitle="(partial correlation; pearson method; Holm p-value adjustment; participant as random effect)")
# text = list(fontface = "italic")
g
ggsave(g, scale =1, filename = "figs/level_aggregated/heatmaps/partial_correlation_all.png", width = 14, height = 6, dpi = 320, limitsize = FALSE)
#PLOT GAUSSIAN GRAPH MODEL
# plot(c)
###### VIS WITH CORRPLOT <- -- customizable but can't save to file ARGH
## GET THE MATRIX
m <- as.matrix(c)
## JUST CIRCLES
corrplot(m, method = 'circle', type = 'lower',
order = 'original', diag = FALSE, addCoef.col = "#7A7A7A",
tl.col = "black")
These plots depict the PARTIAL CORRELATION pairwise between variables (partial correlation factors out influence of other variables), with participant ID as a random effect. The resulting values are pearson moment-correlation coefficients ranging of -1 (direct negative) to +1 direct positive correlation. These correlations are calculated on the full scale semantic differential questions (i.e. with the 0 - 100 range, where 1 and 100 are end points and 50 is the central point)
df <- df_graphs_abs %>% select(
MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE,
MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID)
print("FULL CORRELATION NO RANDOM EFFECT")
## [1] "FULL CORRELATION NO RANDOM EFFECT"
## CALCULATE full correlations with no random effects
c <- df %>% correlation(partial=FALSE, include_factors=FALSE)
(s <- c %>% summary(redundant = FALSE))
## # Correlation Matrix (pearson-method)
##
## Parameter | CHART_BEAUTY | CHART_LIKE | CHART_INTENT | CHART_TRUST | MAKER_TRUST | MAKER_ALIGN | MAKER_SELF | MAKER_ARGUE | MAKER_POLITIC | MAKER_DATA
## ----------------------------------------------------------------------------------------------------------------------------------------------------------
## MAKER_DESIGN | 0.24*** | 0.25*** | 0.13*** | 0.19*** | 0.16*** | 0.13*** | 0.15*** | 0.17*** | 0.11*** | 0.40***
## MAKER_DATA | 0.18*** | 0.19*** | 0.27*** | 0.25*** | 0.20*** | 0.10*** | 0.15*** | 0.18*** | 0.04 |
## MAKER_POLITIC | 0.14*** | 0.19*** | 0.08** | 0.24*** | 0.30*** | 0.58*** | 0.52*** | 0.44*** | |
## MAKER_ARGUE | 0.15*** | 0.19*** | 0.23*** | 0.32*** | 0.44*** | 0.48*** | 0.54*** | | |
## MAKER_SELF | 0.18*** | 0.24*** | 0.20*** | 0.32*** | 0.49*** | 0.63*** | | | |
## MAKER_ALIGN | 0.21*** | 0.28*** | 0.20*** | 0.39*** | 0.52*** | | | | |
## MAKER_TRUST | 0.15*** | 0.24*** | 0.29*** | 0.58*** | | | | | |
## CHART_TRUST | 0.34*** | 0.45*** | 0.37*** | | | | | | |
## CHART_INTENT | 0.19*** | 0.21*** | | | | | | | |
## CHART_LIKE | 0.68*** | | | | | | | | |
##
## p-value adjustment method: Holm (1979)
plot(s, show_data="point") + labs(title = "Correlation Matrix",
subtitle="(full correlation; pearson method; Holm p-value adjustment)") + theme_minimal()
print("PARTIAL CORRELATION WITH PID AS RANDOM EFFECT")
## [1] "PARTIAL CORRELATION WITH PID AS RANDOM EFFECT"
#CALCULATE partial correlations with PID as random effect
## (this isolates correlation pairwise factoring out other variables)
c <- df %>% correlation(partial=TRUE, multilevel = TRUE)
(s <- c %>% summary(redundant = FALSE ))
## # Correlation Matrix (pearson-method)
##
## Parameter | CHART_BEAUTY | CHART_LIKE | CHART_INTENT | CHART_TRUST | MAKER_TRUST | MAKER_ALIGN | MAKER_SELF | MAKER_ARGUE | MAKER_POLITIC | MAKER_DATA
## ----------------------------------------------------------------------------------------------------------------------------------------------------------
## MAKER_DESIGN | 0.08 | 0.07 | -0.06 | -2.84e-03 | 0.02 | -0.02 | 0.03 | 0.03 | 0.03 | 0.31***
## MAKER_DATA | 0.02 | -1.43e-03 | 0.15*** | 0.07 | 0.05 | -0.07 | 0.03 | 0.05 | -0.04 |
## MAKER_POLITIC | -0.02 | 0.03 | -0.07 | 0.01 | -0.08 | 0.33*** | 0.21*** | 0.19*** | |
## MAKER_ARGUE | 0.03 | -0.02 | 0.06 | 0.03 | 0.13*** | 0.07 | 0.22*** | | |
## MAKER_SELF | -9.25e-03 | 0.04 | 0.01 | -0.07 | 0.16*** | 0.32*** | | | |
## MAKER_ALIGN | 0.02 | 0.06 | 0.03 | 0.05 | 0.22*** | | | | |
## MAKER_TRUST | -0.08* | -0.02 | 0.07 | 0.39*** | | | | | |
## CHART_TRUST | 0.07 | 0.22*** | 0.20*** | | | | | | |
## CHART_INTENT | -9.03e-03 | 5.23e-03 | | | | | | | |
## CHART_LIKE | 0.61*** | | | | | | | | |
##
## p-value adjustment method: Holm (1979)
###### VIS WITH CORRELATION PACKAGE
#SEE [correlation] PLOT
g <- plot(s, show_data = "point", show_text = "label",
stars=TRUE, show_legend=FALSE,
show_statistic = FALSE, show_ci = FALSE) +
theme_minimal()+
labs(title = "Correlation Matrix — SD Questions — absolute values",
subtitle="(partial correlation; pearson method; Holm p-value adjustment; participant as random effect)")
# text = list(fontface = "italic")
g
ggsave(g, scale =1, filename = "figs/level_aggregated/heatmaps/partial_correlation_abs.png", width = 14, height = 6, dpi = 320, limitsize = FALSE)
#PLOT GAUSSIAN GRAPH MODEL
# plot(c)
###### VIS WITH CORRPLOT <- -- customizable but can't save to file ARGH
## GET THE MATRIX
m <- as.matrix(c)
## JUST CIRCLES
corrplot(m, method = 'circle', type = 'lower',
order = 'original', diag = FALSE, addCoef.col = "#7A7A7A",
tl.col = "black")
These plots depict the PARTIAL CORRELATION pairwise between variables (partial correlation factors out influence of other variables), with participant ID as a random effect. The resulting values are pearson moment-correlation coefficients ranging of -1 (direct negative) to +1 direct positive correlation. These correlations are calculated on the ABSOLUTE VALUE of the semantic differential questions (i.e. with the full scale folded in half, such that 50 now becomes 0, and the extrememe values (0, 100) become 50). The absolute value scale allows us to collapse for weak (near zero) vs. strong (near 50) signal in each variable.
Here we explore the distribution of each SD variable (e.g. MAKER TRUST) by the different values of each categorical variable (e.g. MAKER ID). Patterns of interest are noted, which we explore further in the section exploratory questions.
if(graph_render){
df <- df_graphs %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
MAKER_ID, MAKER_AGE, MAKER_GENDER)
## CORRELATION MATRIX SPLIT BY MAKER ID
(x <- ggscatmat(df, columns = 1:11, color = "MAKER_ID", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="reds", direction = "1"), name = "", guide = guide_legend(reverse = FALSE)) +
theme_minimal()
)
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("maker_id_corr_sd.png"), units = c("in"), width = 14, height = 10 )
x
}
Interesting patterns to explore further
if(graph_render){
df <- df_graphs_abs %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
MAKER_ID, MAKER_AGE, MAKER_GENDER)
## CORRELATION MATRIX SPLIT BY MAKER ID
(x <- ggscatmat(df, columns = 1:11, color = "MAKER_ID", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="reds", direction = "1"), name = "", guide = guide_legend(reverse = FALSE)) +
theme_minimal())
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("maker_id_corr_abs.png"), units = c("in"), width = 14, height = 10 )
x
}
if(graph_render){
df <- df_graphs %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
MAKER_ID, MAKER_AGE, MAKER_GENDER)
## CORRELATION MATRIX SPLIT BY MAKER AGE
(x <- ggscatmat(df, columns = 1:11, color = "MAKER_AGE", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="lightblues", direction = "1"), name = "", guide = guide_legend(reverse = TRUE)) +
theme_minimal())
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("maker_age_corr_sd.png"), units = c("in"), width = 14, height = 10 )
x
}
Interesting patterns to explore further
if(graph_render){
df <- df_graphs_abs %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
MAKER_ID, MAKER_AGE, MAKER_GENDER)
## CORRELATION MATRIX SPLIT BY MAKER AGE
(x <- ggscatmat(df, columns = 1:11, color = "MAKER_AGE", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="lightblues", direction = "1"), name = "", guide = guide_legend(reverse = TRUE)) +
theme_minimal())
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("maker_age_corr_abs.png"), units = c("in"), width = 14, height = 10 )
x
}
if(graph_render){
df <- df_graphs %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
MAKER_ID, MAKER_AGE, MAKER_GENDER)
## CORRELATION MATRIX SPLIT BY MAKER GENDER
(x <- ggscatmat(df, columns = 1:11, color = "MAKER_GENDER", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="smallgreens", direction = "1"), name = "", guide = guide_legend(reverse = TRUE)) +
theme_minimal() )
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("maker_gender_corr_sd.png"), units = c("in"), width = 14, height = 10 )
x
}
Interesting patterns to explore further - maker-data for FEMALE
if(graph_render){
df <- df_graphs_abs %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
MAKER_ID, MAKER_AGE, MAKER_GENDER)
## CORRELATION MATRIX SPLIT BY MAKER GENDER
(x <- ggscatmat(df, columns = 1:11, color = "MAKER_GENDER", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="smallgreens", direction = "1"), name = "", guide = guide_legend(reverse = TRUE)) +
theme_minimal() )
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("maker_gender_corr_abs.png"), units = c("in"), width = 14, height = 10 )
x
}
if(graph_render){
df <- df_tools %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
TOOL_ID)
## CORRELATION MATRIX SPLIT BY TOOL ID
(x <- ggscatmat(df, columns = 1:11, color = "TOOL_ID", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="tools", direction = "1"), name = "", guide = guide_legend(reverse = TRUE)) +
theme_minimal() )
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("tool_id_corr_sd.png"), units = c("in"), width = 14, height = 10 )
x
}
Interesting patterns to explore further - maker data for design-basic, interesting pattern - look closer at chart beauty - interesting pattern across values on chart intent
if(graph_render){
df <- df_graphs %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
ENCOUNTER) %>%
mutate(ENCOUNTER = fct_rev(ENCOUNTER))
## CORRELATION MATRIX SPLIT BY ENCOUNTER
(x <- ggscatmat(df, columns = 1:11, color = "ENCOUNTER", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="encounter", direction = "1"))+
theme_minimal())
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("encounter_corr_sd.png"), units = c("in"), width = 14, height = 10 )
x
}
Interesting patterns to explore further — no difference at ALL in maker data - interesting! - chart beauty very diff - chart intent intersting bimodal
if(graph_render){
df <- df_graphs_abs %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
ENCOUNTER) %>%
mutate(ENCOUNTER = fct_rev(ENCOUNTER))
## CORRELATION MATRIX SPLIT BY ENCOUNTER
(x <- ggscatmat(df, columns = 1:11, color = "ENCOUNTER", alpha = 0.8) +
scale_color_manual(values = my_palettes(name="encounter", direction = "1"))+
theme_minimal())
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("encounter_corr_abs.png"), units = c("in"), width = 14, height = 10 )
x
}
if(graph_render){
df <- df_actions %>% select(MAKER_DESIGN, MAKER_DATA,
MAKER_POLITIC, MAKER_ARGUE, MAKER_SELF, MAKER_ALIGN, MAKER_TRUST,
CHART_TRUST, CHART_INTENT, CHART_LIKE, CHART_BEAUTY,
PID, STIMULUS, BLOCK, STIMULUS_CATEGORY,
CHART_ACTION)
## CORRELATION MATRIX SPLIT BY CHART ACTION
(x <- ggscatmat(df, columns = 1:11, color = "CHART_ACTION", alpha = 0.2) +
scale_color_manual(values = my_palettes(name="actions", direction = "1"), name = "", guide = guide_legend(reverse = TRUE)) +
theme_minimal() )
ggsave(plot = x, path="figs/level_aggregated/pairplots", filename =paste0("chart_action_corr_sd.png"), units = c("in"), width = 14, height = 10 )
x
}
Interesting patterns to explore further - unfollow/block across all!
df <- df_graphs
## Does MAKER_DATA depend on MAKER ID?
##RIDGEPLOT w/ MEAN
answers <- levels(df$MAKER_ID)
left <- rep(ref_labels['MAKER_DATA','left'], length(levels(df$MAKER_ID)))
right <- rep(ref_labels['MAKER_DATA','right'], length(levels(df$MAKER_ID)))
df %>%
group_by(MAKER_ID) %>%
mutate(count = n()) %>%
ggplot(aes(y = fct_rev(MAKER_ID), x= MAKER_DATA, fill = fct_rev(MAKER_ID))) +
scale_x_continuous(limits = c(0,100))+
geom_density_ridges(scale = 0.55,quantile_lines = TRUE, alpha = 0.75) +
stat_pointinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi") +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=+2, hjust = 0, aes( label=round(..x.., digits=0)))+
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), name = "", guide = guide_legend(reverse = TRUE)) +
guides(
y = guide_axis_manual(labels = left, title = ""),
y.sec = guide_axis_manual(labels = right)
) +
geom_text(aes(label= paste0("n=",count) , y = MAKER_ID, x = 100), color = "black",size = 3, nudge_y = 0.25) +
cowplot::draw_text(text = toupper(answers), x = 0, y= answers, size = 10, vjust=-2, hjust=0) +
labs (title = "DATA COMPETENCY by MAKER ID", y = "", x = "MAKER DATA COMPETENCY", caption="(mean in blue)") +
theme_minimal() + easy_remove_legend()
## Picking joint bandwidth of 7.9
### LINEAR MIXED EFFECTS MODEL ##################
df <- df_graphs
## SET CONTRASTS
contrasts(df$MAKER_ID) <-car::contr.Treatment(levels(df$MAKER_ID)) # intercept first group mean; coeff dif from first
## DEFINE MODEL
f <- "MAKER_DATA ~ MAKER_ID + (1|PID) + (1|STIMULUS)"
m1 <-lmer(MAKER_DATA ~ MAKER_ID + (1|PID) + (1|STIMULUS), data=df)
## PRINT MODEL
(m_eq <- extract_eq(m1, use_coef = TRUE, ital_vars = TRUE, coef_digits = 1, wrap = TRUE, intercept = "beta"))
\[ \begin{aligned} \widehat{MAKER\_DATA}_{i} &\sim N \left(54.3_{\alpha_{j[i],k[i]}}, \sigma^2 \right) \\ \alpha_{j} &\sim N \left(-8_{\gamma_{1}^{\alpha}}(MAKER\_ID_{[T.organization]}) - 13.7_{\gamma_{2}^{\alpha}}(MAKER\_ID_{[T.news]}) - 20.1_{\gamma_{3}^{\alpha}}(MAKER\_ID_{[T.education]}) - 16.4_{\gamma_{4}^{\alpha}}(MAKER\_ID_{[T.political]}) - 13.5_{\gamma_{5}^{\alpha}}(MAKER\_ID_{[T.business]}), 9 \right) \text{, for PID j = 1,} \dots \text{,J} \\ \alpha_{k} &\sim N \left(0, 11.5 \right) \text{, for STIMULUS k = 1,} \dots \text{,K} \end{aligned} \]
## DESCRIBE MODEL
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: MAKER_DATA ~ MAKER_ID + (1 | PID) + (1 | STIMULUS)
## Data: df
##
## REML criterion at convergence: 14572.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7463 -0.6693 -0.0982 0.6322 3.2701
##
## Random effects:
## Groups Name Variance Std.Dev.
## PID (Intercept) 80.33 8.963
## STIMULUS (Intercept) 131.58 11.471
## Residual 482.62 21.969
## Number of obs: 1590, groups: PID, 318; STIMULUS, 25
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 54.340 3.023 60.428 17.976
## MAKER_ID[T.organization] -7.970 2.878 1523.019 -2.770
## MAKER_ID[T.news] -13.679 2.370 1527.078 -5.771
## MAKER_ID[T.education] -20.131 2.180 1504.192 -9.236
## MAKER_ID[T.political] -16.415 2.484 1546.715 -6.607
## MAKER_ID[T.business] -13.458 2.294 1538.056 -5.866
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## MAKER_ID[T.organization] 0.00567 **
## MAKER_ID[T.news] 0.0000000095283 ***
## MAKER_ID[T.education] < 0.0000000000000002 ***
## MAKER_ID[T.political] 0.0000000000537 ***
## MAKER_ID[T.business] 0.0000000054420 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MAKER_ID[T.r] MAKER_ID[T.n] MAKER_ID[T.d] MAKER_ID[T.p]
## MAKER_ID[T.r] -0.381
## MAKER_ID[T.n] -0.518 0.500
## MAKER_ID[T.d] -0.527 0.509 0.678
## MAKER_ID[T.p] -0.498 0.478 0.652 0.639
## MAKER_ID[T.b] -0.513 0.491 0.658 0.697 0.619
anova(m1)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## MAKER_ID 45061 9012.2 5 1494.3 18.673 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance(m1)
## # Indices of model performance
##
## AIC | AICc | BIC | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma
## -------------------------------------------------------------------------------------
## 14590.159 | 14590.273 | 14638.503 | 0.340 | 0.051 | 0.305 | 20.756 | 21.969
report(m1)
## We fitted a linear mixed model (estimated using REML and nloptwrap optimizer)
## to predict MAKER_DATA with MAKER_ID (formula: MAKER_DATA ~ MAKER_ID). The model
## included PID as random effects (formula: list(~1 | PID, ~1 | STIMULUS)). The
## model's total explanatory power is substantial (conditional R2 = 0.34) and the
## part related to the fixed effects alone (marginal R2) is of 0.05. The model's
## intercept, corresponding to MAKER_ID = individual, is at 54.34 (95% CI [48.41,
## 60.27], t(1581) = 17.98, p < .001). Within this model:
##
## - The effect of MAKER ID[T.organization] is statistically significant and
## negative (beta = -7.97, 95% CI [-13.61, -2.33], t(1581) = -2.77, p = 0.006;
## Std. beta = -0.29, 95% CI [-0.49, -0.08])
## - The effect of MAKER ID[T.news] is statistically significant and negative
## (beta = -13.68, 95% CI [-18.33, -9.03], t(1581) = -5.77, p < .001; Std. beta =
## -0.49, 95% CI [-0.66, -0.33])
## - The effect of MAKER ID[T.education] is statistically significant and negative
## (beta = -20.13, 95% CI [-24.41, -15.86], t(1581) = -9.24, p < .001; Std. beta =
## -0.73, 95% CI [-0.88, -0.57])
## - The effect of MAKER ID[T.political] is statistically significant and negative
## (beta = -16.41, 95% CI [-21.29, -11.54], t(1581) = -6.61, p < .001; Std. beta =
## -0.59, 95% CI [-0.77, -0.42])
## - The effect of MAKER ID[T.business] is statistically significant and negative
## (beta = -13.46, 95% CI [-17.96, -8.96], t(1581) = -5.87, p < .001; Std. beta =
## -0.49, 95% CI [-0.65, -0.32])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
## PLOT MODEL COEFFICIENTS
coefs <- model_parameters(m1)
plot_model(m1, type = "est",
show.intercept = TRUE,
show.values = TRUE,
value.offset = .25,
show.p = TRUE
) + theme_minimal() + labs(caption=f)
## PLOT MODEL PREDICTIONS
means <- estimate_means(m1, at = c("MAKER_ID"))
# sjPlot::plot_model(m1, type = "pred", terms = c("MAKER_ID")) +
# theme_minimal() + labs(caption=f)
# plot(means) + theme_minimal() + labs(caption=f) +
# geom_text(aes(x=means$MAKER_ID, y=means$Mean, label=round(means$Mean,1)),
# color="blue", position = position_nudge(x=0.25))
## PLOT MODEL PREDICTIONS with CONTRASTS
## contrasts
# black = estimated means and CI range; grey = CI range of the difference (as compared to the point estimate).
(contrasts <- estimate_contrasts(m1, contrast="MAKER_ID", method="pairwise"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | df | t | p
## --------------------------------------------------------------------------------------------
## education | business | -6.67 | [-11.80, -1.54] | 1.75 | 1450.53 | -3.82 | 0.001
## education | political | -3.72 | [ -9.61, 2.18] | 2.01 | 1503.73 | -1.85 | 0.256
## individual | business | 13.46 | [ 6.70, 20.22] | 2.30 | 1538.67 | 5.85 | < .001
## individual | education | 20.13 | [ 13.71, 26.55] | 2.18 | 1505.25 | 9.22 | < .001
## individual | news | 13.68 | [ 6.70, 20.66] | 2.38 | 1527.70 | 5.76 | < .001
## individual | organization | 7.97 | [ -0.50, 16.44] | 2.88 | 1524.06 | 2.77 | 0.040
## individual | political | 16.41 | [ 9.09, 23.73] | 2.49 | 1547.12 | 6.59 | < .001
## news | business | -0.22 | [ -5.91, 5.47] | 1.93 | 1477.69 | -0.11 | 0.909
## news | education | 6.45 | [ 1.05, 11.85] | 1.84 | 1457.70 | 3.51 | 0.004
## news | political | 2.74 | [ -3.23, 8.71] | 2.03 | 1487.65 | 1.35 | 0.476
## organization | business | 5.49 | [ -2.34, 13.31] | 2.66 | 1508.62 | 2.06 | 0.197
## organization | education | 12.16 | [ 4.57, 19.75] | 2.58 | 1508.15 | 4.71 | < .001
## organization | news | 5.71 | [ -2.12, 13.54] | 2.66 | 1499.60 | 2.14 | 0.193
## organization | political | 8.44 | [ 0.32, 16.57] | 2.76 | 1520.19 | 3.05 | 0.018
## political | business | -2.96 | [ -9.12, 3.21] | 2.10 | 1493.91 | -1.41 | 0.476
##
## Marginal contrasts estimated at MAKER_ID
## p-value adjustment method: Holm (1979)
plot(contrasts, means) +
geom_text(aes(x=means$MAKER_ID, y=means$Mean, label=round(means$Mean,1)), color="blue", position = position_nudge(x=0.25)) +
theme_minimal() + labs(caption = f)
df <- df_graphs
## Does MAKER_DESIGN depend on MAKER ID?
##RIDGEPLOT w/ MEAN
answers <- levels(df$MAKER_ID)
left <- rep(ref_labels['MAKER_DESIGN','left'], length(levels(df$MAKER_ID)))
right <- rep(ref_labels['MAKER_DESIGN','right'], length(levels(df$MAKER_ID)))
df %>%
group_by(MAKER_ID) %>%
mutate(count = n()) %>%
ggplot(aes(y = fct_rev(MAKER_ID), x= MAKER_DESIGN, fill = fct_rev(MAKER_ID))) +
scale_x_continuous(limits = c(0,100))+
geom_density_ridges(scale = 0.55,quantile_lines = TRUE, alpha = 0.75) +
stat_pointinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi") +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=+2, hjust = 0, aes( label=round(..x.., digits=0)))+
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), name = "", guide = guide_legend(reverse = TRUE)) +
guides(
y = guide_axis_manual(labels = left, title = ""),
y.sec = guide_axis_manual(labels = right)
) +
geom_text(aes(label= paste0("n=",count) , y = MAKER_ID, x = 100), color = "black",size = 3, nudge_y = 0.25) +
cowplot::draw_text(text = toupper(answers), x = 0, y= answers, size = 10, vjust=-2, hjust=0) +
labs (title = "DESIGN COMPETENCY by MAKER ID", y = "", x = "MAKER DESIGN COMPETENCY", caption="(mean in blue)") +
theme_minimal() + easy_remove_legend()
## Picking joint bandwidth of 8.17
### LINEAR MIXED EFFECTS MODEL ##################
df <- df_graphs
## DEFINE MODEL
f <- "MAKER_DESIGN ~ MAKER_ID + (1|PID) + (1|STIMULUS)"
m1 <-lmer(MAKER_DESIGN ~ MAKER_ID + (1|PID) + (1|STIMULUS), data=df)
## PRINT MODEL
(m_eq <- extract_eq(m1, use_coef = TRUE, ital_vars = TRUE, coef_digits = 1, wrap = TRUE, intercept = "beta"))
\[ \begin{aligned} \widehat{MAKER\_DESIGN}_{i} &\sim N \left(62.5_{\alpha_{j[i],k[i]}}, \sigma^2 \right) \\ \alpha_{j} &\sim N \left(-15.7_{\gamma_{1}^{\alpha}}(MAKER\_ID_{organization}) - 23.9_{\gamma_{2}^{\alpha}}(MAKER\_ID_{news}) - 12.3_{\gamma_{3}^{\alpha}}(MAKER\_ID_{education}) - 20.2_{\gamma_{4}^{\alpha}}(MAKER\_ID_{political}) - 15.7_{\gamma_{5}^{\alpha}}(MAKER\_ID_{business}), 8.3 \right) \text{, for PID j = 1,} \dots \text{,J} \\ \alpha_{k} &\sim N \left(0, 11.8 \right) \text{, for STIMULUS k = 1,} \dots \text{,K} \end{aligned} \]
## DESCRIBE MODEL
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: MAKER_DESIGN ~ MAKER_ID + (1 | PID) + (1 | STIMULUS)
## Data: df
##
## REML criterion at convergence: 14710.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2704 -0.6818 -0.0276 0.6768 2.5092
##
## Random effects:
## Groups Name Variance Std.Dev.
## PID (Intercept) 68.24 8.261
## STIMULUS (Intercept) 139.11 11.794
## Residual 539.31 23.223
## Number of obs: 1590, groups: PID, 318; STIMULUS, 25
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 62.526 3.125 61.610 20.009 < 0.0000000000000002
## MAKER_IDorganization -15.661 3.018 1539.762 -5.189 0.0000002390642455
## MAKER_IDnews -23.929 2.486 1543.472 -9.625 < 0.0000000000000002
## MAKER_IDeducation -12.298 2.288 1522.194 -5.374 0.0000000888493856
## MAKER_IDpolitical -20.195 2.603 1561.314 -7.757 0.0000000000000156
## MAKER_IDbusiness -15.683 2.405 1554.156 -6.520 0.0000000000946328
##
## (Intercept) ***
## MAKER_IDorganization ***
## MAKER_IDnews ***
## MAKER_IDeducation ***
## MAKER_IDpolitical ***
## MAKER_IDbusiness ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MAKER_IDr MAKER_IDn MAKER_IDd MAKER_IDp
## MAKER_IDrgn -0.386
## MAKER_IDnws -0.524 0.500
## MAKER_IDdct -0.534 0.509 0.676
## MAKER_IDplt -0.505 0.477 0.651 0.638
## MAKER_IDbsn -0.519 0.490 0.656 0.696 0.618
anova(m1)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## MAKER_ID 55394 11079 5 1511.8 20.542 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance(m1)
## # Indices of model performance
##
## AIC | AICc | BIC | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma
## -------------------------------------------------------------------------------------
## 14728.725 | 14728.839 | 14777.069 | 0.323 | 0.063 | 0.278 | 22.104 | 23.223
report(m1)
## We fitted a linear mixed model (estimated using REML and nloptwrap optimizer)
## to predict MAKER_DESIGN with MAKER_ID (formula: MAKER_DESIGN ~ MAKER_ID). The
## model included PID as random effects (formula: list(~1 | PID, ~1 | STIMULUS)).
## The model's total explanatory power is substantial (conditional R2 = 0.32) and
## the part related to the fixed effects alone (marginal R2) is of 0.06. The
## model's intercept, corresponding to MAKER_ID = individual, is at 62.53 (95% CI
## [56.40, 68.65], t(1581) = 20.01, p < .001). Within this model:
##
## - The effect of MAKER ID [organization] is statistically significant and
## negative (beta = -15.66, 95% CI [-21.58, -9.74], t(1581) = -5.19, p < .001;
## Std. beta = -0.55, 95% CI [-0.76, -0.34])
## - The effect of MAKER ID [news] is statistically significant and negative (beta
## = -23.93, 95% CI [-28.81, -19.05], t(1581) = -9.62, p < .001; Std. beta =
## -0.84, 95% CI [-1.02, -0.67])
## - The effect of MAKER ID [education] is statistically significant and negative
## (beta = -12.30, 95% CI [-16.79, -7.81], t(1581) = -5.37, p < .001; Std. beta =
## -0.43, 95% CI [-0.59, -0.28])
## - The effect of MAKER ID [political] is statistically significant and negative
## (beta = -20.19, 95% CI [-25.30, -15.09], t(1581) = -7.76, p < .001; Std. beta =
## -0.71, 95% CI [-0.89, -0.53])
## - The effect of MAKER ID [business] is statistically significant and negative
## (beta = -15.68, 95% CI [-20.40, -10.96], t(1581) = -6.52, p < .001; Std. beta =
## -0.55, 95% CI [-0.72, -0.39])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
## PLOT MODEL COEFFICIENTS
coefs <- model_parameters(m1)
plot_model(m1, type = "est",
show.intercept = TRUE,
show.values = TRUE,
show.p = TRUE
) + theme_minimal() + labs(caption=f)
## PLOT MODEL PREDICTIONS
means <- estimate_means(m1, at = c("MAKER_ID"))
# sjPlot::plot_model(m1, type = "pred", terms = c("MAKER_ID")) +
# theme_minimal() + labs(caption=f)
# plot(means) + theme_minimal() + labs(caption=f) +
# geom_text(aes(x=means$MAKER_ID, y=means$Mean, label=round(means$Mean,1)),
# color="blue", position = position_nudge(x=0.25))
## PLOT MODEL PREDICTIONS with CONTRASTS
## contrasts
# black = estimated means and CI range; grey = CI range of the difference (as compared to the point estimate).
(contrasts <- estimate_contrasts(m1, contrast="MAKER_ID", method="pairwise"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | df | t | p
## -----------------------------------------------------------------------------------------------
## education | business | 3.38 | [ -2.01, 8.78] | 1.84 | 1468.49 | 1.84 | 0.327
## education | political | 7.90 | [ 1.71, 14.09] | 2.11 | 1521.55 | 3.75 | 0.001
## individual | business | 15.68 | [ 8.60, 22.77] | 2.41 | 1554.59 | 6.51 | < .001
## individual | education | 12.30 | [ 5.56, 19.04] | 2.29 | 1523.08 | 5.36 | < .001
## individual | news | 23.93 | [ 16.60, 31.26] | 2.49 | 1543.93 | 9.60 | < .001
## individual | organization | 15.66 | [ 6.78, 24.55] | 3.02 | 1540.54 | 5.18 | < .001
## individual | political | 20.19 | [ 12.52, 27.87] | 2.61 | 1561.56 | 7.74 | < .001
## news | business | -8.25 | [-14.22, -2.27] | 2.03 | 1496.07 | -4.06 | < .001
## news | education | -11.63 | [-17.31, -5.95] | 1.93 | 1475.26 | -6.02 | < .001
## news | political | -3.73 | [-10.01, 2.54] | 2.13 | 1506.23 | -1.75 | 0.327
## organization | business | 0.02 | [ -8.19, 8.23] | 2.79 | 1526.44 | 7.63e-03 | 0.994
## organization | education | -3.36 | [-11.32, 4.60] | 2.71 | 1525.72 | -1.24 | 0.429
## organization | news | 8.27 | [ 0.05, 16.48] | 2.79 | 1517.53 | 2.96 | 0.022
## organization | political | 4.53 | [ -3.99, 13.06] | 2.90 | 1537.12 | 1.56 | 0.355
## political | business | -4.51 | [-10.99, 1.96] | 2.20 | 1512.04 | -2.05 | 0.244
##
## Marginal contrasts estimated at MAKER_ID
## p-value adjustment method: Holm (1979)
plot(contrasts, means) +
geom_text(aes(x=means$MAKER_ID, y=means$Mean, label=round(means$Mean,1)), color="blue", position = position_nudge(x=0.25)) +
theme_minimal() + labs(caption = f)
maker_design, chart_like, chart_beauty for BOOMER vs. others
maker_data for gen Z vs others
maker-data for FEMALE
maker data for design-basic, interesting pattern
look closer at chart beauty
interesting pattern across values on chart intent
— no difference at ALL in maker data - interesting! - chart beauty very diff - chart intent intersting bimodal
df <- df_graphs
## Does MAKER POLITICS depend on MAKER ID?
##RIDGEPLOT w/ MEAN
answers <- levels(df$MAKER_ID)
left <- rep(ref_labels['MAKER_POLITIC','left'], length(levels(df$MAKER_ID)))
right <- rep(ref_labels['MAKER_POLITIC','right'], length(levels(df$MAKER_ID)))
df %>%
group_by(MAKER_ID) %>%
mutate(count = n()) %>%
ggplot(aes(y = fct_rev(MAKER_ID), x= MAKER_POLITIC, fill = fct_rev(MAKER_ID))) +
scale_x_continuous(limits = c(0,100))+
geom_density_ridges(scale = 0.55,quantile_lines = TRUE, alpha = 0.75) +
stat_pointinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi") +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=+2, hjust = 0, aes( label=round(..x.., digits=0)))+
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), name = "", guide = guide_legend(reverse = TRUE)) +
guides(
y = guide_axis_manual(labels = left, title = ""),
y.sec = guide_axis_manual(labels = right)
) +
geom_text(aes(label= paste0("n=",count) , y = MAKER_ID, x = 100), color = "black",size = 3, nudge_y = 0.25) +
cowplot::draw_text(text = toupper(answers), x = 0, y= answers, size = 10, vjust=-2, hjust=0) +
labs (title = "POLITICS by MAKER ID", y = "", x = "MAKER POLITICS", caption="(mean in blue)") +
theme_minimal() + easy_remove_legend()
### LINEAR MIXED EFFECTS MODEL ##################
df <- df_graphs
## DEFINE MODEL
f <- "MAKER_POLITIC ~ MAKER_ID + (1|PID) + (1|STIMULUS)"
m1 <-lmer(MAKER_POLITIC ~ MAKER_ID + (1|PID) + (1|STIMULUS), data=df)
## PRINT MODEL
(m_eq <- extract_eq(m1, use_coef = TRUE, ital_vars = TRUE, coef_digits = 1, wrap = TRUE, intercept = "beta"))
\[ \begin{aligned} \widehat{MAKER\_POLITIC}_{i} &\sim N \left(47.9_{\alpha_{j[i],k[i]}}, \sigma^2 \right) \\ \alpha_{j} &\sim N \left(1.9_{\gamma_{1}^{\alpha}}(MAKER\_ID_{organization}) - 1.1_{\gamma_{2}^{\alpha}}(MAKER\_ID_{news}) + 0.2_{\gamma_{3}^{\alpha}}(MAKER\_ID_{education}) + 4.8_{\gamma_{4}^{\alpha}}(MAKER\_ID_{political}) + 4.6_{\gamma_{5}^{\alpha}}(MAKER\_ID_{business}), 4.8 \right) \text{, for PID j = 1,} \dots \text{,J} \\ \alpha_{k} &\sim N \left(0, 7 \right) \text{, for STIMULUS k = 1,} \dots \text{,K} \end{aligned} \]
## DESCRIBE MODEL
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: MAKER_POLITIC ~ MAKER_ID + (1 | PID) + (1 | STIMULUS)
## Data: df
##
## REML criterion at convergence: 13415.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6063 -0.4780 0.0128 0.4581 3.7978
##
## Random effects:
## Groups Name Variance Std.Dev.
## PID (Intercept) 23.06 4.802
## STIMULUS (Intercept) 48.57 6.969
## Residual 243.87 15.616
## Number of obs: 1590, groups: PID, 318; STIMULUS, 25
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 47.9057 1.9454 73.4244 24.625
## MAKER_IDorganization 1.9436 2.0127 1555.7185 0.966
## MAKER_IDnews -1.0792 1.6563 1554.8284 -0.652
## MAKER_IDeducation 0.1855 1.5265 1539.3043 0.122
## MAKER_IDpolitical 4.7547 1.7328 1569.5819 2.744
## MAKER_IDbusiness 4.6068 1.6021 1566.8878 2.876
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## MAKER_IDorganization 0.33434
## MAKER_IDnews 0.51478
## MAKER_IDeducation 0.90330
## MAKER_IDpolitical 0.00614 **
## MAKER_IDbusiness 0.00409 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MAKER_IDr MAKER_IDn MAKER_IDd MAKER_IDp
## MAKER_IDrgn -0.413
## MAKER_IDnws -0.560 0.498
## MAKER_IDdct -0.571 0.508 0.675
## MAKER_IDplt -0.539 0.476 0.649 0.637
## MAKER_IDbsn -0.555 0.489 0.654 0.694 0.616
anova(m1)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## MAKER_ID 7877.9 1575.6 5 1528.9 6.4606 0.000005947 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance(m1)
## # Indices of model performance
##
## AIC | AICc | BIC | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma
## -------------------------------------------------------------------------------------
## 13433.639 | 13433.753 | 13481.982 | 0.241 | 0.018 | 0.227 | 14.973 | 15.616
report(m1)
## We fitted a linear mixed model (estimated using REML and nloptwrap optimizer)
## to predict MAKER_POLITIC with MAKER_ID (formula: MAKER_POLITIC ~ MAKER_ID). The
## model included PID as random effects (formula: list(~1 | PID, ~1 | STIMULUS)).
## The model's total explanatory power is moderate (conditional R2 = 0.24) and the
## part related to the fixed effects alone (marginal R2) is of 0.02. The model's
## intercept, corresponding to MAKER_ID = individual, is at 47.91 (95% CI [44.09,
## 51.72], t(1581) = 24.62, p < .001). Within this model:
##
## - The effect of MAKER ID [organization] is statistically non-significant and
## positive (beta = 1.94, 95% CI [-2.00, 5.89], t(1581) = 0.97, p = 0.334; Std.
## beta = 0.10, 95% CI [-0.11, 0.31])
## - The effect of MAKER ID [news] is statistically non-significant and negative
## (beta = -1.08, 95% CI [-4.33, 2.17], t(1581) = -0.65, p = 0.515; Std. beta =
## -0.06, 95% CI [-0.23, 0.12])
## - The effect of MAKER ID [education] is statistically non-significant and
## positive (beta = 0.19, 95% CI [-2.81, 3.18], t(1581) = 0.12, p = 0.903; Std.
## beta = 9.91e-03, 95% CI [-0.15, 0.17])
## - The effect of MAKER ID [political] is statistically significant and positive
## (beta = 4.75, 95% CI [1.36, 8.15], t(1581) = 2.74, p = 0.006; Std. beta = 0.25,
## 95% CI [0.07, 0.44])
## - The effect of MAKER ID [business] is statistically significant and positive
## (beta = 4.61, 95% CI [1.46, 7.75], t(1581) = 2.88, p = 0.004; Std. beta = 0.25,
## 95% CI [0.08, 0.41])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
## PLOT MODEL COEFFICIENTS
coefs <- model_parameters(m1)
plot_model(m1, type = "est",
show.intercept = TRUE,
show.values = TRUE,
show.p = TRUE
) + theme_minimal() + labs(caption=f)
## PLOT MODEL PREDICTIONS
means <- estimate_means(m1, at = c("MAKER_ID"))
# sjPlot::plot_model(m1, type = "pred", terms = c("MAKER_ID")) +
# theme_minimal() + labs(caption=f)
# plot(means) + theme_minimal() + labs(caption=f) +
# geom_text(aes(x=means$MAKER_ID, y=means$Mean, label=round(means$Mean,1)),
# color="blue", position = position_nudge(x=0.25))
## PLOT MODEL PREDICTIONS with CONTRASTS
## contrasts
# black = estimated means and CI range; grey = CI range of the difference (as compared to the point estimate).
(contrasts <- estimate_contrasts(m1, contrast="MAKER_ID", method="pairwise"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | df | t | p
## --------------------------------------------------------------------------------------------
## education | business | -4.42 | [ -8.03, -0.81] | 1.23 | 1486.57 | -3.60 | 0.004
## education | political | -4.57 | [ -8.70, -0.44] | 1.40 | 1536.05 | -3.25 | 0.014
## individual | business | -4.61 | [ -9.33, 0.11] | 1.61 | 1566.73 | -2.87 | 0.046
## individual | education | -0.19 | [ -4.68, 4.31] | 1.53 | 1539.01 | -0.12 | > .999
## individual | news | 1.08 | [ -3.80, 5.96] | 1.66 | 1554.41 | 0.65 | > .999
## individual | organization | -1.94 | [ -7.87, 3.98] | 2.02 | 1555.71 | -0.96 | > .999
## individual | political | -4.75 | [ -9.86, 0.35] | 1.74 | 1569.33 | -2.74 | 0.063
## news | business | -5.69 | [ -9.68, -1.69] | 1.36 | 1514.15 | -4.19 | < .001
## news | education | -1.26 | [ -5.06, 2.53] | 1.29 | 1493.05 | -0.98 | > .999
## news | political | -5.83 | [-10.02, -1.65] | 1.42 | 1524.71 | -4.10 | < .001
## organization | business | -2.66 | [ -8.14, 2.82] | 1.86 | 1543.38 | -1.43 | > .999
## organization | education | 1.76 | [ -3.56, 7.07] | 1.81 | 1542.04 | 0.97 | > .999
## organization | news | 3.02 | [ -2.46, 8.51] | 1.87 | 1534.65 | 1.62 | 0.949
## organization | political | -2.81 | [ -8.50, 2.88] | 1.93 | 1552.83 | -1.45 | > .999
## political | business | 0.15 | [ -4.17, 4.47] | 1.47 | 1526.57 | 0.10 | > .999
##
## Marginal contrasts estimated at MAKER_ID
## p-value adjustment method: Holm (1979)
plot(contrasts, means) +
geom_text(aes(x=means$MAKER_ID, y=means$Mean, label=round(means$Mean,1)), color="blue", position = position_nudge(x=0.25)) +
theme_minimal() + labs(caption = f)
df <- df_graphs
## Does MAKER_TRUST depend on MAKER ID?
##RIDGEPLOT w/ MEAN
answers <- levels(df$MAKER_ID)
left <- rep(ref_labels['MAKER_TRUST','left'], length(levels(df$MAKER_ID)))
right <- rep(ref_labels['MAKER_TRUST','right'], length(levels(df$MAKER_ID)))
df %>%
group_by(MAKER_ID) %>%
mutate(count = n()) %>%
ggplot(aes(y = fct_rev(MAKER_ID), x= MAKER_TRUST, fill = fct_rev(MAKER_ID))) +
scale_x_continuous(limits = c(0,100))+
geom_density_ridges(scale = 0.55,quantile_lines = TRUE, alpha = 0.75) +
stat_pointinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi") +
stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
vjust=+2, hjust = 0, aes( label=round(..x.., digits=0)))+
stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), name = "", guide = guide_legend(reverse = TRUE)) +
guides(
y = guide_axis_manual(labels = left, title = ""),
y.sec = guide_axis_manual(labels = right)
) +
geom_text(aes(label= paste0("n=",count) , y = MAKER_ID, x = 100), color = "black",size = 3, nudge_y = 0.25) +
cowplot::draw_text(text = toupper(answers), x = 0, y= answers, size = 10, vjust=-2, hjust=0) +
labs (title = "MAKER TRUST COMPETENCY by MAKER ID", y = "", x = "MAKER TRUST", caption="(mean in blue)") +
theme_minimal() + easy_remove_legend()
## Picking joint bandwidth of 4.55
### LINEAR MIXED EFFECTS MODEL ##################
df <- df_graphs
## DEFINE MODEL
f <- "MAKER_TRUST ~ MAKER_ID + (1|PID) + (1|STIMULUS)"
m1 <-lmer(MAKER_TRUST ~ MAKER_ID + (1|PID) + (1|STIMULUS), data=df)
## PRINT MODEL
(m_eq <- extract_eq(m1, use_coef = TRUE, ital_vars = TRUE, coef_digits = 1, wrap = TRUE, intercept = "beta"))
\[ \begin{aligned} \widehat{MAKER\_TRUST}_{i} &\sim N \left(52.2_{\alpha_{j[i],k[i]}}, \sigma^2 \right) \\ \alpha_{j} &\sim N \left(5_{\gamma_{1}^{\alpha}}(MAKER\_ID_{organization}) + 6.4_{\gamma_{2}^{\alpha}}(MAKER\_ID_{news}) + 11.7_{\gamma_{3}^{\alpha}}(MAKER\_ID_{education}) + 1.6_{\gamma_{4}^{\alpha}}(MAKER\_ID_{political}) + 1.6_{\gamma_{5}^{\alpha}}(MAKER\_ID_{business}), 7.1 \right) \text{, for PID j = 1,} \dots \text{,J} \\ \alpha_{k} &\sim N \left(0, 5.5 \right) \text{, for STIMULUS k = 1,} \dots \text{,K} \end{aligned} \]
## DESCRIBE MODEL
summary(m1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: MAKER_TRUST ~ MAKER_ID + (1 | PID) + (1 | STIMULUS)
## Data: df
##
## REML criterion at convergence: 13527.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4861 -0.5306 -0.0062 0.5833 2.7640
##
## Random effects:
## Groups Name Variance Std.Dev.
## PID (Intercept) 49.79 7.056
## STIMULUS (Intercept) 30.35 5.509
## Residual 247.90 15.745
## Number of obs: 1590, groups: PID, 318; STIMULUS, 25
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 52.232 1.795 108.126 29.106 < 0.0000000000000002
## MAKER_IDorganization 5.030 2.068 1518.206 2.432 0.015118
## MAKER_IDnews 6.375 1.693 1485.447 3.765 0.000173
## MAKER_IDeducation 11.706 1.560 1484.176 7.506 0.000000000000105
## MAKER_IDpolitical 1.633 1.776 1503.328 0.919 0.358119
## MAKER_IDbusiness 1.622 1.642 1512.352 0.988 0.323356
##
## (Intercept) ***
## MAKER_IDorganization *
## MAKER_IDnews ***
## MAKER_IDeducation ***
## MAKER_IDpolitical
## MAKER_IDbusiness
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) MAKER_IDr MAKER_IDn MAKER_IDd MAKER_IDp
## MAKER_IDrgn -0.458
## MAKER_IDnws -0.622 0.498
## MAKER_IDdct -0.635 0.508 0.677
## MAKER_IDplt -0.599 0.475 0.650 0.639
## MAKER_IDbsn -0.617 0.490 0.657 0.695 0.620
anova(m1)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## MAKER_ID 25811 5162.2 5 1479.5 20.824 < 0.00000000000000022 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance(m1)
## # Indices of model performance
##
## AIC | AICc | BIC | R2 (cond.) | R2 (marg.) | ICC | RMSE | Sigma
## -------------------------------------------------------------------------------------
## 13545.852 | 13545.966 | 13594.195 | 0.285 | 0.054 | 0.244 | 14.807 | 15.745
report(m1)
## We fitted a linear mixed model (estimated using REML and nloptwrap optimizer)
## to predict MAKER_TRUST with MAKER_ID (formula: MAKER_TRUST ~ MAKER_ID). The
## model included PID as random effects (formula: list(~1 | PID, ~1 | STIMULUS)).
## The model's total explanatory power is substantial (conditional R2 = 0.29) and
## the part related to the fixed effects alone (marginal R2) is of 0.05. The
## model's intercept, corresponding to MAKER_ID = individual, is at 52.23 (95% CI
## [48.71, 55.75], t(1581) = 29.11, p < .001). Within this model:
##
## - The effect of MAKER ID [organization] is statistically significant and
## positive (beta = 5.03, 95% CI [0.97, 9.09], t(1581) = 2.43, p = 0.015; Std.
## beta = 0.27, 95% CI [0.05, 0.49])
## - The effect of MAKER ID [news] is statistically significant and positive (beta
## = 6.38, 95% CI [3.05, 9.70], t(1581) = 3.76, p < .001; Std. beta = 0.34, 95% CI
## [0.16, 0.52])
## - The effect of MAKER ID [education] is statistically significant and positive
## (beta = 11.71, 95% CI [8.65, 14.77], t(1581) = 7.51, p < .001; Std. beta =
## 0.63, 95% CI [0.46, 0.79])
## - The effect of MAKER ID [political] is statistically non-significant and
## positive (beta = 1.63, 95% CI [-1.85, 5.12], t(1581) = 0.92, p = 0.358; Std.
## beta = 0.09, 95% CI [-0.10, 0.27])
## - The effect of MAKER ID [business] is statistically non-significant and
## positive (beta = 1.62, 95% CI [-1.60, 4.84], t(1581) = 0.99, p = 0.323; Std.
## beta = 0.09, 95% CI [-0.09, 0.26])
##
## Standardized parameters were obtained by fitting the model on a standardized
## version of the dataset. 95% Confidence Intervals (CIs) and p-values were
## computed using a Wald t-distribution approximation.
## PLOT MODEL COEFFICIENTS
coefs <- model_parameters(m1)
plot_model(m1, type = "est",
show.intercept = TRUE,
show.values = TRUE,
show.p = TRUE
) + theme_minimal() + labs(caption=f)
## PLOT MODEL PREDICTIONS
means <- estimate_means(m1, at = c("MAKER_ID"))
# sjPlot::plot_model(m1, type = "pred", terms = c("MAKER_ID")) +
# theme_minimal() + labs(caption=f)
# plot(means) + theme_minimal() + labs(caption=f) +
# geom_text(aes(x=means$MAKER_ID, y=means$Mean, label=round(means$Mean,1)),
# color="blue", position = position_nudge(x=0.25))
## PLOT MODEL PREDICTIONS with CONTRASTS
## contrasts
# black = estimated means and CI range; grey = CI range of the difference (as compared to the point estimate).
(contrasts <- estimate_contrasts(m1, contrast="MAKER_ID", method="pairwise"))
## Marginal Contrasts Analysis
##
## Level1 | Level2 | Difference | 95% CI | SE | df | t | p
## -----------------------------------------------------------------------------------------------
## education | business | 10.08 | [ 6.40, 13.77] | 1.25 | 1444.24 | 8.05 | < .001
## education | political | 10.07 | [ 5.86, 14.29] | 1.43 | 1474.08 | 7.02 | < .001
## individual | business | -1.62 | [ -6.46, 3.22] | 1.65 | 1513.27 | -0.98 | > .999
## individual | education | -11.71 | [-16.30, -7.11] | 1.56 | 1485.07 | -7.49 | < .001
## individual | news | -6.38 | [-11.37, -1.38] | 1.70 | 1487.04 | -3.75 | 0.002
## individual | organization | -5.03 | [-11.12, 1.06] | 2.07 | 1518.40 | -2.43 | 0.107
## individual | political | -1.63 | [ -6.87, 3.61] | 1.78 | 1504.82 | -0.92 | > .999
## news | business | 4.75 | [ 0.68, 8.83] | 1.39 | 1465.35 | 3.43 | 0.006
## news | education | -5.33 | [ -9.20, -1.46] | 1.32 | 1447.18 | -4.05 | < .001
## news | political | 4.74 | [ 0.46, 9.02] | 1.46 | 1479.72 | 3.26 | 0.009
## organization | business | 3.41 | [ -2.22, 9.03] | 1.91 | 1502.81 | 1.78 | 0.450
## organization | education | -6.68 | [-12.13, -1.22] | 1.86 | 1501.53 | -3.60 | 0.003
## organization | news | -1.35 | [ -6.97, 4.28] | 1.91 | 1493.53 | -0.70 | > .999
## organization | political | 3.40 | [ -2.45, 9.24] | 1.99 | 1514.87 | 1.71 | 0.450
## political | business | 0.01 | [ -4.40, 4.42] | 1.50 | 1460.93 | 6.82e-03 | > .999
##
## Marginal contrasts estimated at MAKER_ID
## p-value adjustment method: Holm (1979)
plot(contrasts, means) +
geom_text(aes(x=means$MAKER_ID, y=means$Mean, label=round(means$Mean,1)), color="blue", position = position_nudge(x=0.25)) +
theme_minimal() + labs(caption = f)
wip code stash
#
# ## [test-frame] Are the confidence scores significantly different for different questions?
# ## [model-frame] Does QUESTION predict CONFIDENCE, accounting for random variance in SUBJECT and STIMULUS?
#
#
# ## MIXED model with random variance only at subject (not stimulus)
# mm1 <- lmer( CONFIDENCE ~ QUESTION + (1|PID), data = df)
# # summary(mm1)
# # plot(check_model(mm1))
# # pm <- model_parameters(mm1)
# # plot(pm, show_labels = TRUE, show_intercept = TRUE) + labs(title = "CONFIDENCE ~ QUESTION + (1|PID)")
# # performance(mm1)
# # report(mm1)
#
#
# ## MIXED model with random variance only at subject AND stimulus
# mm2 <- lmer( CONFIDENCE ~ QUESTION + (1|PID) + (1|STIMULUS), data = df)
# # summary(mm2)
# # plot(check_model(mm2))
# # pm <- model_parameters(mm2)
# # plot_model(mm2)
# # plot(pm, show_labels = TRUE, show_intercept = TRUE) + labs(title = "CONFIDENCE ~ QUESTION + (1|PID) + (1|STIMULUS)")
# # performance(mm2)
# # report(mm2)
#
#
# ## MIXED model with random slope for question by person and random intercept by stimulus
# mm3 <- lmer( CONFIDENCE ~ QUESTION + (1 + QUESTION | PID) + (1|STIMULUS), data = df)
# # summary(mm3)
# # plot(check_model(mm3))
# # pm <- model_parameters(mm3)
# # plot(pm, show_labels = TRUE, show_intercept = TRUE) + labs(title = "CONFIDENCE ~ QUESTION + (1 + QUESTION | PID) + (1|STIMULUS)")
# # performance(mm3)
# # report(mm3)
#
#
# ## MIXED model with STIMULUS as FIXED effect and random intercept by person
# mm4 <- lmer( CONFIDENCE ~ QUESTION + STIMULUS + (1 | PID), data = df)
# # summary(mm4)
# # plot(check_model(mm4))
# # pm <- model_parameters(mm4)
# # plot(pm, show_labels = TRUE, show_intercept = FALSE) + labs(title = "CONFIDENCE ~ QUESTION + STIMULUS + (1 | PID)")
# # performance(mm4)
# # report(mm4)
#
# ## MIXED model with STIMULUS * QUESTION interaction and random intercept by person
# mm5 <- lmer( CONFIDENCE ~ QUESTION * STIMULUS + (1 | PID), data = df)
# # summary(mm5)
# # plot(check_model(mm5))
# # pm <- model_parameters(mm5)
# # plot(pm, show_labels = TRUE, show_intercept = FALSE) + labs(title = "CONFIDENCE ~ QUESTION * STIMULUS + (1 | PID)")
# # performance(mm5)
# # report(mm5)
#
#
# ## MIXED model with STIMULUS * QUESTION interaction and random intercept by person
# mmx <- lmer( CONFIDENCE ~ STIMULUS + (1 | PID) + (1 | QUESTION), data = df)
# # summary(mmx)
# # plot(check_model(mmx))
# # pm <- model_parameters(mmx)
# # plot(pm, show_labels = TRUE, show_intercept = FALSE) + labs(title = "CONFIDENCE ~ STIMULUS + (1 | PID) + (1 | QUESTION)")
# # performance(mmx)
# # report(mmx)
#
#
# ### COMPARE MODELS
# # compare_parameters(mm1,mm2,mm3, mm4, mm5, mmx)
# compare_performance(mm1,mm2,mm3, mm4, mm5, mmx, rank = TRUE )
# ## model 3 is the best fit, and is appropriate to the design of the study
# summary(mm3)
# report(mm3)
# # plot_model(mm3, terms = c("QUESTION", "STIMULUS"), type = "diag")
#
# # # ## repeated measures aov
# # print("Repeated Measures ANOVA")
# # ex1 <- aov(CONFIDENCE~QUESTION+Error(PID), data=df)
# # summary(ex1)
# # report(ex1)
#
# ## SHADED CIRCLES
# corrplot(m, method = 'circle', type = 'lower',
# order = 'AOE', diag = FALSE,
# insig='blank',
# tl.col = "black")
#
#
# ## SHADED NUMBERS
# corrplot(m, order = 'AOE', method = "number",
# diag = FALSE, type = "lower",
# insig='blank',
# # insig = 'label_sig', sig.level = c(0.001, 0.01, 0.05),
# addCoef.col = '#595D60',
# tl.pos = "ld", tl.col = "#595D60")
#
#
# ## SHADED SQUARED + COEFFS
# corrplot(m, order = 'AOE', method = "circle",
# diag = FALSE, type = "lower",
# insig='blank', sig.level = 0.05,
# # insig = 'label_sig', sig.level = c(0.001, 0.01, 0.05),
# addCoef.col = '#595D60',
# tl.pos = "ld", tl.col = "#595D60")
#
############## SETUP FOR FLIPPING SCALES ON SOME QUESTIONS TO MAKE THEM MORE READABLE
ref_sd_reordered <- c("MAKER_DATA","MAKER_DESIGN",
"CHART_BEAUTY", "CHART_LIKE",
"MAKER_POLITIC","MAKER_ARGUE", "MAKER_SELF", "CHART_INTENT",
"MAKER_ALIGN","MAKER_TRUST",
"CHART_TRUST")
left_reordered <- c("layperson","layperson",
"NOT at all","NOT at all",
"left-leaning",
"diplomatic",
"altruistic",
"inform",
"DOES share",
"untrustworthy",
"untrustworthy")
right_reordered <- c("professional","professional",
"very much", "very much",
"right-leaning",
"confrontational",
"selfish",
"persuade",
"does NOT share",
"trustworthy",
"trusthworthy")
ref_labels_reordered <- as.data.frame(cbind(left_reordered,right_reordered))
rownames(ref_labels_reordered) <- ref_sd_questions
## GGALLY correlation heatmap
# ggcorr(df,
# label = TRUE, geom = "tile",
# nbreaks = 5, layout.exp = 2,
# # label_round = 2,
# angle = -0, hjust = 0.8, vjust = 1, size = 2.5,
# low = "#D88585",mid = "white", high= "#6DA0D6") +
# easy_remove_legend() +
# labs(title = "Correlation between SD measures", subtitle = ("pairwise; Pearson correlations"))
# ## Does MAKER_TRUST depend on MAKER ID?
# ##RIDGEPLOT w/ MEAN
# answers <- levels(df$MAKER_ID)
# left <- rep(ref_labels['MAKER_TRUST','left'], length(levels(df$MAKER_ID)))
# right <- rep(ref_labels['MAKER_TRUST','right'], length(levels(df$MAKER_ID)))
# df %>% ggplot(aes(y = fct_rev(MAKER_ID), x= MAKER_TRUST, fill = fct_rev(MAKER_ID))) +
# geom_density_ridges(scale = 0.55,quantile_lines = TRUE, alpha = 0.75) +
# stat_dotsinterval(side = "bottom", scale = 0.7, slab_linewidth = NA, point_interval = "mean_qi") +
# stat_summary(fun=mean, geom="text", colour="blue", fontface = "bold",
# vjust=+2, hjust = 0, aes( label=round(..x.., digits=0)))+
# stat_summary(fun="mean", geom="point", shape=20, size=5, color="blue", fill="blue") +
# scale_fill_manual(values = my_palettes(name="reds", direction = "-1"), name = "", guide = guide_legend(reverse = TRUE)) +
# guides(
# y = guide_axis_manual(labels = left, title = ""),
# y.sec = guide_axis_manual(labels = right)
# ) +
# cowplot::draw_text(text = toupper(answers), x = 10, y= answers,size = 10, vjust=-2) +
# labs (title = "MAKER TRUST by MAKER ID", y = "", x = "MAKER TRUST", caption="(mean in blue)") +
# theme_minimal() + easy_remove_legend()
##good for seeing the color schemes
# #### DEFINE SET
# stimulus = "B2-1"
# df <- df_graphs %>% filter(STIMULUS == stimulus)
#
# #### GENERATE GRAPHS
#
# #MAKER_ID-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "reds",
# main = paste0(stimulus, " MAKER ID")) + theme_minimal()
#
#
# #MAKER_GENDER-DONUT
# PieChart(MAKER_GENDER, data = df,
# fill = "blues",
# main = paste0(stimulus, " MAKER GENDER")) + theme_minimal()
#
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_AGE, data = df,
# fill = "olives",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "rusts",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "olives",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "greens",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "emeralds",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "turquoises",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "aquas",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-MAKER_ID
# PieChart(MAKER_ID, data = df,
# fill = "purples",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "magentas",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "violets",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
#
# #MAKER_AGE-DONUT
# PieChart(MAKER_ID, data = df,
# fill = "grays",
# main = paste0(stimulus, " MAKER AGE")) + theme_minimal()
# "reds" h 0
# "rusts" h 30
# "browns" h 60
# "olives" h 90
# "greens" h 120
# "emeralds" h 150
# "turquoises" h 180
# "aquas" h 210
# "blues" h 240
# "purples" h 270
# "violets" h 300
# "magentas" h 330
# "grays"
# df <- df_graphs %>% filter(STIMULUS== s)
# #### CATEGORICAL DONUT PLOTS
# #subset data cols
# cols <- df %>% select( all_of(ref_cat_questions))
#
# ggplot( df, aes( x = STIMULUS, fill = MAKER_ID)) +
# geom_bar( position = "stack", width=1) +
# coord_radial(theta = "y", start = 0, inner.radius = 0.5, expand=FALSE) +
# scale_fill_manual(values = my_palettes(name="reds", direction = "1"), name = "", guide = guide_legend(reverse = FALSE)) +
# labs( title = paste0(s, " MAKER ID")) +
# theme_minimal()
#
#
## EXAMPLE ALLUVIAL PLOT USING GGALUVIAL (instead of GGSANKEY)
# https://corybrunson.github.io/ggalluvial/articles/ggalluvial.html
# #FILTER FOR BLOCK 2 STIM AND RESHAPE FOR SANKEY
# ds <- df_graphs %>%
# filter(str_detect(STIMULUS, "B2")) %>%
# select(STIMULUS, MAKER_ID, PID) %>%
# mutate(
# MAKER_ID = fct_relevel(MAKER_ID,
# c("business","education","individual", "news","organization", "political" ))
# )
#
# ds %>%
# ggplot(aes( x = STIMULUS,
# stratum = MAKER_ID,
# label = MAKER_ID,
# alluvium = PID)) +
# stat_alluvium(aes(fill = MAKER_ID),
# width = 0,
# alpha = 1,
# geom = "flow")+
# geom_stratum(width = 0.2, aes(fill= MAKER_ID))+
# # geom_text(stat = "stratum", size = 5, angle = 90)+
# scale_fill_viridis(discrete=TRUE, option="viridis", drop = FALSE,
# alpha = 1) +
# theme_minimal()